Merge branch 'main' into mdp/dotenv_jest

2024-04-25 17:59:20 -07:00 · 2024-04-25 17:59:20 -07:00 · f368e94cee
commit f368e94cee
parent 6ea818fac8 4fce848ebb
21 changed files with 230 additions and 54 deletions
--- a/README.md
+++ b/README.md
@ -180,6 +180,15 @@ url = 'https://example.com'
 scraped_data = app.scrape_url(url)
 ```
 ### Search for a query
 Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
 ```python
 query = 'what is mendable?'
 search_result = app.search(query)
 ```
 ## Contributing
 We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -373,6 +373,8 @@
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
@ -406,6 +408,7 @@
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
--- a/apps/api/src/search/index.ts
+++ b/apps/api/src/search/index.ts
@ -2,6 +2,9 @@ import { SearchResult } from "../../src/lib/entities";
 import { google_search } from "./googlesearch";
 import { serper_search } from "./serper";
 export async function search({
  query,
  advanced = false,
--- a/apps/js-sdk/firecrawl/build/index.js
+++ b/apps/js-sdk/firecrawl/build/index.js
@ -61,6 +61,43 @@ export default class FirecrawlApp {
            return { success: false, error: 'Internal server error.' };
        });
    }
    /**
     * Searches for a query using the Firecrawl API.
     * @param {string} query - The query to search for.
     * @param {Params | null} params - Additional parameters for the search request.
     * @returns {Promise<SearchResponse>} The response from the search operation.
     */
    search(query_1) {
        return __awaiter(this, arguments, void 0, function* (query, params = null) {
            const headers = {
                'Content-Type': 'application/json',
                'Authorization': `Bearer ${this.apiKey}`,
            };
            let jsonData = { query };
            if (params) {
                jsonData = Object.assign(Object.assign({}, jsonData), params);
            }
            try {
                const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
                if (response.status === 200) {
                    const responseData = response.data;
                    if (responseData.success) {
                        return responseData;
                    }
                    else {
                        throw new Error(`Failed to search. Error: ${responseData.error}`);
                    }
                }
                else {
                    this.handleError(response, 'search');
                }
            }
            catch (error) {
                throw new Error(error.message);
            }
            return { success: false, error: 'Internal server error.' };
        });
    }
    /**
     * Initiates a crawl job for a URL using the Firecrawl API.
     * @param {string} url - The URL to crawl.
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,6 +1,6 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "0.0.14",
+  "version": "0.0.15",
  "description": "JavaScript SDK for Firecrawl API",
  "main": "build/index.js",
  "types": "types/index.d.ts",
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -23,6 +23,14 @@ export interface ScrapeResponse {
  error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
  success: boolean;
  data?: any;
  error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
@ -94,6 +102,39 @@ export default class FirecrawlApp {
    return { success: false, error: 'Internal server error.' };
  }
  /**
   * Searches for a query using the Firecrawl API.
   * @param {string} query - The query to search for.
   * @param {Params | null} params - Additional parameters for the search request.
   * @returns {Promise<SearchResponse>} The response from the search operation.
   */
  async search(query: string, params: Params | null = null): Promise<SearchResponse> {
    const headers: AxiosRequestHeaders = {
      'Content-Type': 'application/json',
      'Authorization': `Bearer ${this.apiKey}`,
    } as AxiosRequestHeaders;
    let jsonData: Params = { query };
    if (params) {
      jsonData = { ...jsonData, ...params };
    }
    try {
      const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
      if (response.status === 200) {
        const responseData = response.data;
        if (responseData.success) {
          return responseData; 
        } else {
          throw new Error(`Failed to search. Error: ${responseData.error}`);
        }
      } else {
        this.handleError(response, 'search');
      }
    } catch (error: any) {
      throw new Error(error.message);
    }
    return { success: false, error: 'Internal server error.' };
  }
  /**
   * Initiates a crawl job for a URL using the Firecrawl API.
   * @param {string} url - The URL to crawl.
--- a/apps/js-sdk/firecrawl/types/index.d.ts
+++ b/apps/js-sdk/firecrawl/types/index.d.ts
@ -19,6 +19,14 @@ export interface ScrapeResponse {
    data?: any;
    error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
    success: boolean;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
@ -55,6 +63,13 @@ export default class FirecrawlApp {
     * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
     */
    scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
    /**
     * Searches for a query using the Firecrawl API.
     * @param {string} query - The query to search for.
     * @param {Params | null} params - Additional parameters for the search request.
     * @returns {Promise<SearchResponse>} The response from the search operation.
     */
    search(query: string, params?: Params | null): Promise<SearchResponse>;
    /**
     * Initiates a crawl job for a URL using the Firecrawl API.
     * @param {string} url - The URL to crawl.
--- a/apps/js-sdk/package-lock.json
+++ b/apps/js-sdk/package-lock.json
@ -9,14 +9,14 @@
      "version": "1.0.0",
      "license": "ISC",
      "dependencies": {
-        "@mendable/firecrawl-js": "^0.0.8",
+        "@mendable/firecrawl-js": "^0.0.15",
        "axios": "^1.6.8"
      }
    },
    "node_modules/@mendable/firecrawl-js": {
-      "version": "0.0.8",
+      "version": "0.0.15",
-      "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.8.tgz",
+      "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz",
-      "integrity": "sha512-dD7eA5X6UT8CM3z7qCqHgA4YbCsdwmmlaT/L0/ozM6gGvb0PnJMoB+e51+n4lAW8mxXOvHGbq9nrgBT1wEhhhw==",
+      "integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==",
      "dependencies": {
        "axios": "^1.6.8",
        "dotenv": "^16.4.5"
--- a/apps/js-sdk/package.json
+++ b/apps/js-sdk/package.json
@ -11,7 +11,7 @@
  "author": "",
  "license": "ISC",
  "dependencies": {
-    "@mendable/firecrawl-js": "^0.0.8",
+    "@mendable/firecrawl-js": "^0.0.15",
    "axios": "^1.6.8"
  }
 }
--- a/apps/playwright-service/main.py
+++ b/apps/playwright-service/main.py
@ -1,29 +1,36 @@
-from fastapi import FastAPI, Response
+from fastapi import FastAPI
-from playwright.async_api import async_playwright
+from playwright.async_api import async_playwright, Browser
 import os
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 app = FastAPI()
 from pydantic import BaseModel
 class UrlModel(BaseModel):
    url: str
@app.post("/html")  # Kept as POST to accept body parameters
 async def root(body: UrlModel):  # Using Pydantic model for request body
    async with async_playwright() as p:
        browser = await p.chromium.launch()
-        context = await browser.new_context()
+browser: Browser = None
        page = await context.new_page()
        await page.goto(body.url)  # Adjusted to use the url from the request body model
        page_content = await page.content()  # Get the HTML content of the page
-        await context.close()
+@app.on_event("startup")
 async def startup_event():
    global browser
    playwright = await async_playwright().start()
    browser = await playwright.chromium.launch()
@app.on_event("shutdown")
 async def shutdown_event():
    await browser.close()
@app.post("/html")
 async def root(body: UrlModel):
    context = await browser.new_context()
    page = await context.new_page()
    await page.goto(body.url)
    page_content = await page.content()
    await context.close()
    json_compatible_item_data = {"content": page_content}
    return JSONResponse(content=json_compatible_item_data)
--- a/apps/python-sdk/README.md
+++ b/apps/python-sdk/README.md
@ -47,6 +47,15 @@ url = 'https://example.com'
 scraped_data = app.scrape_url(url)
 ```
 ### Search for a query
 Used to search the web, get the most relevant results, scrap each page and return the markdown.
 ```python
 query = 'what is mendable?'
 search_result = app.search(query)
 ```
 ### Crawling a Website
 To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
--- a/apps/python-sdk/build/lib/firecrawl/firecrawl.py
+++ b/apps/python-sdk/build/lib/firecrawl/firecrawl.py
@ -33,6 +33,32 @@ class FirecrawlApp:
        else:
            raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
    def search(self, query, params=None):
        headers = {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {self.api_key}'
        }
        json_data = {'query': query}
        if params:
            json_data.update(params)
        response = requests.post(
            'https://api.firecrawl.dev/v0/search',
            headers=headers,
            json=json_data
        )
        if response.status_code == 200:
            response = response.json()
            if response['success'] == True:
                return response['data']
            else:
                raise Exception(f'Failed to search. Error: {response["error"]}')
        elif response.status_code in [402, 409, 500]:
            error_message = response.json().get('error', 'Unknown error occurred')
            raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
        else:
            raise Exception(f'Failed to search. Status code: {response.status_code}')
    def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
        headers = self._prepare_headers()
        json_data = {'url': url}
--- a/apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
+++ b/apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
--- a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
+++ b/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
--- a/apps/python-sdk/dist/firecrawl_py-0.0.5-py3-none-any.whl
+++ b/apps/python-sdk/dist/firecrawl_py-0.0.5-py3-none-any.whl
--- a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
+++ b/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
--- a/apps/python-sdk/firecrawl/pycache/init.cpython-311.pyc
+++ b/apps/python-sdk/firecrawl/pycache/init.cpython-311.pyc
--- a/apps/python-sdk/firecrawl/pycache/firecrawl.cpython-311.pyc
+++ b/apps/python-sdk/firecrawl/pycache/firecrawl.cpython-311.pyc
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -33,6 +33,32 @@ class FirecrawlApp:
        else:
            raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
    def search(self, query, params=None):
        headers = {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {self.api_key}'
        }
        json_data = {'query': query}
        if params:
            json_data.update(params)
        response = requests.post(
            'https://api.firecrawl.dev/v0/search',
            headers=headers,
            json=json_data
        )
        if response.status_code == 200:
            response = response.json()
            if response['success'] == True:
                return response['data']
            else:
                raise Exception(f'Failed to search. Error: {response["error"]}')
        elif response.status_code in [402, 409, 500]:
            error_message = response.json().get('error', 'Unknown error occurred')
            raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
        else:
            raise Exception(f'Failed to search. Status code: {response.status_code}')
    def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
        headers = self._prepare_headers()
        json_data = {'url': url}
--- a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO
+++ b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO
@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: firecrawl-py
-Version: 0.0.5
+Version: 0.0.6
 Summary: Python SDK for Firecrawl API
-Home-page: https://github.com/mendableai/firecrawl-py
+Home-page: https://github.com/mendableai/firecrawl
 Author: Mendable.ai
 Author-email: nick@mendable.ai
--- a/apps/python-sdk/setup.py
+++ b/apps/python-sdk/setup.py
@ -2,8 +2,8 @@ from setuptools import setup, find_packages
 setup(
    name='firecrawl-py',
-    version='0.0.5',
+    version='0.0.6',
-    url='https://github.com/mendableai/firecrawl-py',
+    url='https://github.com/mendableai/firecrawl',
    author='Mendable.ai',
    author_email='nick@mendable.ai',
    description='Python SDK for Firecrawl API',