From e5ffda1eeca2715774596b021fe5a8cdab862917 Mon Sep 17 00:00:00 2001 From: neev jewalkar Date: Tue, 18 Jun 2024 05:42:25 +0530 Subject: [PATCH 1/3] Added local host support for the javascript SDK --- apps/js-sdk/firecrawl/README.md | 5 + apps/js-sdk/firecrawl/build/index.js | 4 +- apps/js-sdk/firecrawl/package-lock.json | 4 +- apps/js-sdk/firecrawl/src/index.ts | 577 ++++++++++++------------ apps/js-sdk/firecrawl/types/index.d.ts | 2 +- apps/js-sdk/package-lock.json | 40 -- 6 files changed, 299 insertions(+), 333 deletions(-) diff --git a/apps/js-sdk/firecrawl/README.md b/apps/js-sdk/firecrawl/README.md index 085e865..d916bf7 100644 --- a/apps/js-sdk/firecrawl/README.md +++ b/apps/js-sdk/firecrawl/README.md @@ -176,6 +176,11 @@ async function checkStatusExample(jobId) { checkStatusExample('your_job_id_here'); ``` +## Running Locally +To use the SDK when running Firecrawl locally, you can change the initial Firecrawl app instance to: +```js +const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY", apiUrl: "http://localhost:3002" }); +``` ## Error Handling diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index b418513..e54e532 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -18,9 +18,9 @@ export default class FirecrawlApp { * Initializes a new instance of the FirecrawlApp class. * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ - constructor({ apiKey = null }) { - this.apiUrl = "https://api.firecrawl.dev"; + constructor({ apiKey = null, apiUrl = null }) { this.apiKey = apiKey || ""; + this.apiUrl = apiUrl || "https://api.firecrawl.dev"; if (!this.apiKey) { throw new Error("No API key provided"); } diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index b1cebde..7094cc9 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.22", + "version": "0.0.26", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.22", + "version": "0.0.26", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index f884125..fd4f2ca 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -5,346 +5,347 @@ import { zodToJsonSchema } from "zod-to-json-schema"; * Configuration interface for FirecrawlApp. */ export interface FirecrawlAppConfig { - apiKey?: string | null; - apiUrl?: string | null; + apiKey?: string | null; + apiUrl?: string | null; } /** * Generic parameter interface. */ export interface Params { - [key: string]: any; - extractorOptions?: { - extractionSchema: z.ZodSchema | any; - mode?: "llm-extraction"; - extractionPrompt?: string; - }; + [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** * Response interface for scraping operations. */ export interface ScrapeResponse { - success: boolean; - data?: any; - error?: string; + success: boolean; + data?: any; + error?: string; } /** * Response interface for searching operations. */ export interface SearchResponse { - success: boolean; - data?: any; - error?: string; + success: boolean; + data?: any; + error?: string; } /** * Response interface for crawling operations. */ export interface CrawlResponse { - success: boolean; - jobId?: string; - data?: any; - error?: string; + success: boolean; + jobId?: string; + data?: any; + error?: string; } /** * Response interface for job status checks. */ export interface JobStatusResponse { - success: boolean; - status: string; - jobId?: string; - data?: any; - partial_data?: any, - error?: string; + success: boolean; + status: string; + jobId?: string; + data?: any; + partial_data?: any, + error?: string; } /** * Main class for interacting with the Firecrawl API. */ export default class FirecrawlApp { - private apiKey: string; - private apiUrl: string = "https://api.firecrawl.dev"; + private apiKey: string; + private apiUrl: string; - /** - * Initializes a new instance of the FirecrawlApp class. - * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || ""; - if (!this.apiKey) { - throw new Error("No API key provided"); - } - } - - /** - * Scrapes a URL using the Firecrawl API. - * @param {string} url - The URL to scrape. - * @param {Params | null} params - Additional parameters for the scrape request. - * @returns {Promise} The response from the scrape operation. - */ - async scrapeUrl( - url: string, - params: Params | null = null - ): Promise { - const headers: AxiosRequestHeaders = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; - let jsonData: Params = { url, ...params }; - if (params?.extractorOptions?.extractionSchema) { - let schema = params.extractorOptions.extractionSchema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof z.ZodSchema) { - schema = zodToJsonSchema(schema); - } - jsonData = { - ...jsonData, - extractorOptions: { - ...params.extractorOptions, - extractionSchema: schema, - mode: params.extractorOptions.mode || "llm-extraction", - }, - }; - } - try { - const response: AxiosResponse = await axios.post( - this.apiUrl + "/v0/scrape", - jsonData, - { headers }, - ); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + /** + * Initializes a new instance of the FirecrawlApp class. + * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. + */ + constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { + this.apiKey = apiKey || ""; + this.apiUrl = apiUrl || "https://api.firecrawl.dev" + if (!this.apiKey) { + throw new Error("No API key provided"); } - } else { - this.handleError(response, "scrape URL"); - } - } catch (error: any) { - throw new Error(error.message); } - return { success: false, error: "Internal server error." }; - } - /** - * Searches for a query using the Firecrawl API. - * @param {string} query - The query to search for. - * @param {Params | null} params - Additional parameters for the search request. - * @returns {Promise} The response from the search operation. - */ - async search( - query: string, - params: Params | null = null - ): Promise { - const headers: AxiosRequestHeaders = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; - let jsonData: Params = { query }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response: AxiosResponse = await axios.post( - this.apiUrl + "/v0/search", - jsonData, - { headers } - ); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } else { - throw new Error(`Failed to search. Error: ${responseData.error}`); + /** + * Scrapes a URL using the Firecrawl API. + * @param {string} url - The URL to scrape. + * @param {Params | null} params - Additional parameters for the scrape request. + * @returns {Promise} The response from the scrape operation. + */ + async scrapeUrl( + url: string, + params: Params | null = null + ): Promise { + const headers: AxiosRequestHeaders = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { url, ...params }; + if (params?.extractorOptions?.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } + jsonData = { + ...jsonData, + extractorOptions: { + ...params.extractorOptions, + extractionSchema: schema, + mode: params.extractorOptions.mode || "llm-extraction", + }, + }; } - } else { - this.handleError(response, "search"); - } - } catch (error: any) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param {string} url - The URL to crawl. - * @param {Params | null} params - Additional parameters for the crawl request. - * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. - * @param {number} pollInterval - Time in seconds for job status checks. - * @param {string} idempotencyKey - Optional idempotency key for the request. - * @returns {Promise} The response from the crawl operation. - */ - async crawlUrl( - url: string, - params: Params | null = null, - waitUntilDone: boolean = true, - pollInterval: number = 2, - idempotencyKey?: string - ): Promise { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData: Params = { url }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response: AxiosResponse = await this.postRequest( - this.apiUrl + "/v0/crawl", - jsonData, - headers - ); - if (response.status === 200) { - const jobId: string = response.data.jobId; - if (waitUntilDone) { - return this.monitorJobStatus(jobId, headers, pollInterval); - } else { - return { success: true, jobId }; + try { + const response: AxiosResponse = await axios.post( + this.apiUrl + "/v0/scrape", + jsonData, + { headers }, + ); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } else { + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + } + } else { + this.handleError(response, "scrape URL"); + } + } catch (error: any) { + throw new Error(error.message); } - } else { - this.handleError(response, "start crawl job"); - } - } catch (error: any) { - console.log(error); - throw new Error(error.message); + return { success: false, error: "Internal server error." }; } - return { success: false, error: "Internal server error." }; - } - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param {string} jobId - The job ID of the crawl operation. - * @returns {Promise} The response containing the job status. - */ - async checkCrawlStatus(jobId: string): Promise { - const headers: AxiosRequestHeaders = this.prepareHeaders(); - try { - const response: AxiosResponse = await this.getRequest( - this.apiUrl + `/v0/crawl/status/${jobId}`, - headers - ); - if (response.status === 200) { + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + async search( + query: string, + params: Params | null = null + ): Promise { + const headers: AxiosRequestHeaders = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { query }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await axios.post( + this.apiUrl + "/v0/search", + jsonData, + { headers } + ); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } else { + this.handleError(response, "search"); + } + } catch (error: any) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Initiates a crawl job for a URL using the Firecrawl API. + * @param {string} url - The URL to crawl. + * @param {Params | null} params - Additional parameters for the crawl request. + * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. + * @param {number} pollInterval - Time in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. + * @returns {Promise} The response from the crawl operation. + */ + async crawlUrl( + url: string, + params: Params | null = null, + waitUntilDone: boolean = true, + pollInterval: number = 2, + idempotencyKey?: string + ): Promise { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData: Params = { url }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await this.postRequest( + this.apiUrl + "/v0/crawl", + jsonData, + headers + ); + if (response.status === 200) { + const jobId: string = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, pollInterval); + } else { + return { success: true, jobId }; + } + } else { + this.handleError(response, "start crawl job"); + } + } catch (error: any) { + console.log(error); + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Checks the status of a crawl job using the Firecrawl API. + * @param {string} jobId - The job ID of the crawl operation. + * @returns {Promise} The response containing the job status. + */ + async checkCrawlStatus(jobId: string): Promise { + const headers: AxiosRequestHeaders = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.getRequest( + this.apiUrl + `/v0/crawl/status/${jobId}`, + headers + ); + if (response.status === 200) { + return { + success: true, + status: response.data.status, + data: response.data.data, + partial_data: !response.data.data ? response.data.partial_data : undefined, + }; + } else { + this.handleError(response, "check crawl status"); + } + } catch (error: any) { + throw new Error(error.message); + } return { - success: true, - status: response.data.status, - data: response.data.data, - partial_data: !response.data.data ? response.data.partial_data : undefined, + success: false, + status: "unknown", + error: "Internal server error.", }; - } else { - this.handleError(response, "check crawl status"); - } - } catch (error: any) { - throw new Error(error.message); } - return { - success: false, - status: "unknown", - error: "Internal server error.", - }; - } - /** - * Prepares the headers for an API request. - * @returns {AxiosRequestHeaders} The prepared headers. - */ - prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { - return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}), - } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }; - } + /** + * Prepares the headers for an API request. + * @returns {AxiosRequestHeaders} The prepared headers. + */ + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { + return { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}), + } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }; + } - /** - * Sends a POST request to the specified URL. - * @param {string} url - The URL to send the request to. - * @param {Params} data - The data to send in the request. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @returns {Promise} The response from the POST request. - */ - postRequest( - url: string, - data: Params, - headers: AxiosRequestHeaders - ): Promise { - return axios.post(url, data, { headers }); - } + /** + * Sends a POST request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {Params} data - The data to send in the request. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the POST request. + */ + postRequest( + url: string, + data: Params, + headers: AxiosRequestHeaders + ): Promise { + return axios.post(url, data, { headers }); + } - /** - * Sends a GET request to the specified URL. - * @param {string} url - The URL to send the request to. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @returns {Promise} The response from the GET request. - */ - getRequest( - url: string, - headers: AxiosRequestHeaders - ): Promise { - return axios.get(url, { headers }); - } + /** + * Sends a GET request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the GET request. + */ + getRequest( + url: string, + headers: AxiosRequestHeaders + ): Promise { + return axios.get(url, { headers }); + } - /** - * Monitors the status of a crawl job until completion or failure. - * @param {string} jobId - The job ID of the crawl operation. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @param {number} timeout - Timeout in seconds for job status checks. - * @returns {Promise} The final job status or data. - */ - async monitorJobStatus( - jobId: string, - headers: AxiosRequestHeaders, - checkInterval: number - ): Promise { - while (true) { - const statusResponse: AxiosResponse = await this.getRequest( - this.apiUrl + `/v0/crawl/status/${jobId}`, - headers - ); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData.data; - } else { - throw new Error("Crawl job completed but no data was returned"); - } - } else if ( - ["active", "paused", "pending", "queued"].includes(statusData.status) - ) { - if (checkInterval < 2) { - checkInterval = 2; - } - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again - } else { - throw new Error( - `Crawl job failed or was stopped. Status: ${statusData.status}` - ); + /** + * Monitors the status of a crawl job until completion or failure. + * @param {string} jobId - The job ID of the crawl operation. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The final job status or data. + */ + async monitorJobStatus( + jobId: string, + headers: AxiosRequestHeaders, + checkInterval: number + ): Promise { + while (true) { + const statusResponse: AxiosResponse = await this.getRequest( + this.apiUrl + `/v0/crawl/status/${jobId}`, + headers + ); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === "completed") { + if ("data" in statusData) { + return statusData.data; + } else { + throw new Error("Crawl job completed but no data was returned"); + } + } else if ( + ["active", "paused", "pending", "queued"].includes(statusData.status) + ) { + if (checkInterval < 2) { + checkInterval = 2; + } + await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again + } else { + throw new Error( + `Crawl job failed or was stopped. Status: ${statusData.status}` + ); + } + } else { + this.handleError(statusResponse, "check crawl status"); + } } - } else { - this.handleError(statusResponse, "check crawl status"); - } } - } - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response: AxiosResponse, action: string): void { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage: string = - response.data.error || "Unknown error occurred"; - throw new Error( - `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` - ); - } else { - throw new Error( - `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` - ); + /** + * Handles errors from API responses. + * @param {AxiosResponse} response - The response from the API. + * @param {string} action - The action being performed when the error occurred. + */ + handleError(response: AxiosResponse, action: string): void { + if ([402, 408, 409, 500].includes(response.status)) { + const errorMessage: string = + response.data.error || "Unknown error occurred"; + throw new Error( + `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` + ); + } else { + throw new Error( + `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` + ); + } } - } } diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 52a7d1e..bdf698e 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -64,7 +64,7 @@ export default class FirecrawlApp { * Initializes a new instance of the FirecrawlApp class. * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ - constructor({ apiKey }: FirecrawlAppConfig); + constructor({ apiKey, apiUrl }: FirecrawlAppConfig); /** * Scrapes a URL using the Firecrawl API. * @param {string} url - The URL to scrape. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index c59a371..2bf3f00 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -11,10 +11,8 @@ "dependencies": { "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", - "dotenv": "^16.4.5", "ts-node": "^10.9.2", "typescript": "^5.4.5", - "uuid": "^9.0.1", "zod": "^3.23.8" }, "devDependencies": { @@ -452,15 +450,6 @@ "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" }, - "node_modules/@types/node": { - "version": "20.12.11", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.11.tgz", - "integrity": "sha512-vDg9PZ/zi+Nqp6boSOT7plNuthRugEKixDv5sFTIpkE89MmNtEArAShI4mxuX2+UrLEe9pxC1vm2cjm9YlWbJw==", - "peer": true, - "dependencies": { - "undici-types": "~5.26.4" - } - }, "node_modules/acorn": { "version": "8.11.3", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", @@ -532,17 +521,6 @@ "node": ">=0.3.1" } }, - "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://dotenvx.com" - } - }, "node_modules/esbuild": { "version": "0.20.2", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", @@ -750,24 +728,6 @@ "node": ">=14.17" } }, - "node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "peer": true - }, - "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "bin": { - "uuid": "dist/bin/uuid" - } - }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", From 90a807c54701b5ae062d3ff220d75b6509da5be1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 18 Jun 2024 12:56:13 -0400 Subject: [PATCH 2/3] Update index.ts --- apps/js-sdk/firecrawl/src/index.ts | 618 +++++++++++++++-------------- 1 file changed, 311 insertions(+), 307 deletions(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 4cb9d57..5ee9043 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -73,335 +73,339 @@ export interface FirecrawlDocument { * Response interface for scraping operations. */ export interface ScrapeResponse { - success: boolean; - data?: FirecrawlDocument; - error?: string; + success: boolean; + data?: FirecrawlDocument; + error?: string; } /** -* Response interface for searching operations. -*/ + * Response interface for searching operations. + */ export interface SearchResponse { - success: boolean; - data?: FirecrawlDocument[]; - error?: string; + success: boolean; + data?: FirecrawlDocument[]; + error?: string; } /** -* Response interface for crawling operations. -*/ + * Response interface for crawling operations. + */ export interface CrawlResponse { - success: boolean; - jobId?: string; - data?: FirecrawlDocument[]; - error?: string; + success: boolean; + jobId?: string; + data?: FirecrawlDocument[]; + error?: string; } /** -* Response interface for job status checks. -*/ + * Response interface for job status checks. + */ export interface JobStatusResponse { - success: boolean; - status: string; - jobId?: string; - data?: FirecrawlDocument[]; - partial_data?: FirecrawlDocument[]; - error?: string; + success: boolean; + status: string; + jobId?: string; + data?: FirecrawlDocument[]; + partial_data?: FirecrawlDocument[]; + error?: string; } /** - * Generic parameter interface. - */ + * Generic parameter interface. + */ export interface Params { - [key: string]: any; - extractorOptions?: { - extractionSchema: z.ZodSchema | any; - mode?: "llm-extraction"; - extractionPrompt?: string; - }; + [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** * Main class for interacting with the Firecrawl API. */ export default class FirecrawlApp { - private apiKey: string; - private apiUrl: string; + private apiKey: string; + private apiUrl: string; - /** - * Initializes a new instance of the FirecrawlApp class. - * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || ""; - this.apiUrl = apiUrl || "https://api.firecrawl.dev" - if (!this.apiKey) { - throw new Error("No API key provided"); - } + /** + * Initializes a new instance of the FirecrawlApp class. + * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. + */ + constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { + this.apiKey = apiKey || ""; + this.apiUrl = apiUrl || "https://api.firecrawl.dev"; + if (!this.apiKey) { + throw new Error("No API key provided"); } + } - /** - * Scrapes a URL using the Firecrawl API. - * @param {string} url - The URL to scrape. - * @param {Params | null} params - Additional parameters for the scrape request. - * @returns {Promise} The response from the scrape operation. - */ - async scrapeUrl( - url: string, - params: Params | null = null - ): Promise { - const headers: AxiosRequestHeaders = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; - let jsonData: Params = { url, ...params }; - if (params?.extractorOptions?.extractionSchema) { - let schema = params.extractorOptions.extractionSchema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof z.ZodSchema) { - schema = zodToJsonSchema(schema); - } - jsonData = { - ...jsonData, - extractorOptions: { - ...params.extractorOptions, - extractionSchema: schema, - mode: params.extractorOptions.mode || "llm-extraction", - }, - }; - } - try { - const response: AxiosResponse = await axios.post( - this.apiUrl + "/v0/scrape", - jsonData, - { headers }, - ); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); - } - } else { - this.handleError(response, "scrape URL"); - } - } catch (error: any) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; + /** + * Scrapes a URL using the Firecrawl API. + * @param {string} url - The URL to scrape. + * @param {Params | null} params - Additional parameters for the scrape request. + * @returns {Promise} The response from the scrape operation. + */ + async scrapeUrl( + url: string, + params: Params | null = null + ): Promise { + const headers: AxiosRequestHeaders = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { url, ...params }; + if (params?.extractorOptions?.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } + jsonData = { + ...jsonData, + extractorOptions: { + ...params.extractorOptions, + extractionSchema: schema, + mode: params.extractorOptions.mode || "llm-extraction", + }, + }; } - - /** - * Searches for a query using the Firecrawl API. - * @param {string} query - The query to search for. - * @param {Params | null} params - Additional parameters for the search request. - * @returns {Promise} The response from the search operation. - */ - async search( - query: string, - params: Params | null = null - ): Promise { - const headers: AxiosRequestHeaders = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; - let jsonData: Params = { query }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response: AxiosResponse = await axios.post( - this.apiUrl + "/v0/search", - jsonData, - { headers } - ); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return responseData; - } else { - throw new Error(`Failed to search. Error: ${responseData.error}`); - } - } else { - this.handleError(response, "search"); - } - } catch (error: any) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param {string} url - The URL to crawl. - * @param {Params | null} params - Additional parameters for the crawl request. - * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. - * @param {number} pollInterval - Time in seconds for job status checks. - * @param {string} idempotencyKey - Optional idempotency key for the request. - * @returns {Promise} The response from the crawl operation. - */ - async crawlUrl( - url: string, - params: Params | null = null, - waitUntilDone: boolean = true, - pollInterval: number = 2, - idempotencyKey?: string - ): Promise { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData: Params = { url }; - if (params) { - jsonData = { ...jsonData, ...params }; - } - try { - const response: AxiosResponse = await this.postRequest( - this.apiUrl + "/v0/crawl", - jsonData, - headers - ); - if (response.status === 200) { - const jobId: string = response.data.jobId; - if (waitUntilDone) { - return this.monitorJobStatus(jobId, headers, pollInterval); - } else { - return { success: true, jobId }; - } - } else { - this.handleError(response, "start crawl job"); - } - } catch (error: any) { - console.log(error); - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param {string} jobId - The job ID of the crawl operation. - * @returns {Promise} The response containing the job status. - */ - async checkCrawlStatus(jobId: string): Promise { - const headers: AxiosRequestHeaders = this.prepareHeaders(); - try { - const response: AxiosResponse = await this.getRequest( - this.apiUrl + `/v0/crawl/status/${jobId}`, - headers - ); - if (response.status === 200) { - return { - success: true, - status: response.data.status, - data: response.data.data, - partial_data: !response.data.data ? response.data.partial_data : undefined, - }; - } else { - this.handleError(response, "check crawl status"); - } - } catch (error: any) { - throw new Error(error.message); - } - return { - success: false, - status: "unknown", - error: "Internal server error.", - }; - } - - /** - * Prepares the headers for an API request. - * @returns {AxiosRequestHeaders} The prepared headers. - */ - prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { - return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}), - } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }; - } - - /** - * Sends a POST request to the specified URL. - * @param {string} url - The URL to send the request to. - * @param {Params} data - The data to send in the request. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @returns {Promise} The response from the POST request. - */ - postRequest( - url: string, - data: Params, - headers: AxiosRequestHeaders - ): Promise { - return axios.post(url, data, { headers }); - } - - /** - * Sends a GET request to the specified URL. - * @param {string} url - The URL to send the request to. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @returns {Promise} The response from the GET request. - */ - getRequest( - url: string, - headers: AxiosRequestHeaders - ): Promise { - return axios.get(url, { headers }); - } - - /** - * Monitors the status of a crawl job until completion or failure. - * @param {string} jobId - The job ID of the crawl operation. - * @param {AxiosRequestHeaders} headers - The headers for the request. - * @param {number} timeout - Timeout in seconds for job status checks. - * @returns {Promise} The final job status or data. - */ - async monitorJobStatus( - jobId: string, - headers: AxiosRequestHeaders, - checkInterval: number - ): Promise { - while (true) { - const statusResponse: AxiosResponse = await this.getRequest( - this.apiUrl + `/v0/crawl/status/${jobId}`, - headers - ); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData.data; - } else { - throw new Error("Crawl job completed but no data was returned"); - } - } else if ( - ["active", "paused", "pending", "queued"].includes(statusData.status) - ) { - if (checkInterval < 2) { - checkInterval = 2; - } - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again - } else { - throw new Error( - `Crawl job failed or was stopped. Status: ${statusData.status}` - ); - } - } else { - this.handleError(statusResponse, "check crawl status"); - } - } - } - - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response: AxiosResponse, action: string): void { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage: string = - response.data.error || "Unknown error occurred"; - throw new Error( - `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` - ); + try { + const response: AxiosResponse = await axios.post( + this.apiUrl + "/v0/scrape", + jsonData, + { headers } + ); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; } else { - throw new Error( - `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` - ); + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); } + } else { + this.handleError(response, "scrape URL"); + } + } catch (error: any) { + throw new Error(error.message); } + return { success: false, error: "Internal server error." }; + } + + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + async search( + query: string, + params: Params | null = null + ): Promise { + const headers: AxiosRequestHeaders = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { query }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await axios.post( + this.apiUrl + "/v0/search", + jsonData, + { headers } + ); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } else { + this.handleError(response, "search"); + } + } catch (error: any) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Initiates a crawl job for a URL using the Firecrawl API. + * @param {string} url - The URL to crawl. + * @param {Params | null} params - Additional parameters for the crawl request. + * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. + * @param {number} pollInterval - Time in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. + * @returns {Promise} The response from the crawl operation. + */ + async crawlUrl( + url: string, + params: Params | null = null, + waitUntilDone: boolean = true, + pollInterval: number = 2, + idempotencyKey?: string + ): Promise { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData: Params = { url }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await this.postRequest( + this.apiUrl + "/v0/crawl", + jsonData, + headers + ); + if (response.status === 200) { + const jobId: string = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, pollInterval); + } else { + return { success: true, jobId }; + } + } else { + this.handleError(response, "start crawl job"); + } + } catch (error: any) { + console.log(error); + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + } + + /** + * Checks the status of a crawl job using the Firecrawl API. + * @param {string} jobId - The job ID of the crawl operation. + * @returns {Promise} The response containing the job status. + */ + async checkCrawlStatus(jobId: string): Promise { + const headers: AxiosRequestHeaders = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.getRequest( + this.apiUrl + `/v0/crawl/status/${jobId}`, + headers + ); + if (response.status === 200) { + return { + success: true, + status: response.data.status, + data: response.data.data, + partial_data: !response.data.data + ? response.data.partial_data + : undefined, + }; + } else { + this.handleError(response, "check crawl status"); + } + } catch (error: any) { + throw new Error(error.message); + } + return { + success: false, + status: "unknown", + error: "Internal server error.", + }; + } + + /** + * Prepares the headers for an API request. + * @returns {AxiosRequestHeaders} The prepared headers. + */ + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { + return { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}), + } as AxiosRequestHeaders & { "x-idempotency-key"?: string }; + } + + /** + * Sends a POST request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {Params} data - The data to send in the request. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the POST request. + */ + postRequest( + url: string, + data: Params, + headers: AxiosRequestHeaders + ): Promise { + return axios.post(url, data, { headers }); + } + + /** + * Sends a GET request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the GET request. + */ + getRequest( + url: string, + headers: AxiosRequestHeaders + ): Promise { + return axios.get(url, { headers }); + } + + /** + * Monitors the status of a crawl job until completion or failure. + * @param {string} jobId - The job ID of the crawl operation. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The final job status or data. + */ + async monitorJobStatus( + jobId: string, + headers: AxiosRequestHeaders, + checkInterval: number + ): Promise { + while (true) { + const statusResponse: AxiosResponse = await this.getRequest( + this.apiUrl + `/v0/crawl/status/${jobId}`, + headers + ); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === "completed") { + if ("data" in statusData) { + return statusData.data; + } else { + throw new Error("Crawl job completed but no data was returned"); + } + } else if ( + ["active", "paused", "pending", "queued"].includes(statusData.status) + ) { + if (checkInterval < 2) { + checkInterval = 2; + } + await new Promise((resolve) => + setTimeout(resolve, checkInterval * 1000) + ); // Wait for the specified timeout before checking again + } else { + throw new Error( + `Crawl job failed or was stopped. Status: ${statusData.status}` + ); + } + } else { + this.handleError(statusResponse, "check crawl status"); + } + } + } + + /** + * Handles errors from API responses. + * @param {AxiosResponse} response - The response from the API. + * @param {string} action - The action being performed when the error occurred. + */ + handleError(response: AxiosResponse, action: string): void { + if ([402, 408, 409, 500].includes(response.status)) { + const errorMessage: string = + response.data.error || "Unknown error occurred"; + throw new Error( + `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` + ); + } else { + throw new Error( + `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` + ); + } + } } From 754c9fa08d19398d026fe901b8ef0dbc65842126 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 18 Jun 2024 12:58:57 -0400 Subject: [PATCH 3/3] Update package.json --- apps/js-sdk/firecrawl/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 15d8034..b162882 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.27", + "version": "0.0.28", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts",