From c89964b2303f576b0dd419c44427442917d3ab63 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 16:38:49 -0700 Subject: [PATCH 1/8] Nick: --- apps/js-sdk/firecrawl/build/index.js | 69 +-- apps/js-sdk/firecrawl/package-lock.json | 24 +- apps/js-sdk/firecrawl/package.json | 7 +- apps/js-sdk/firecrawl/src/index.ts | 159 ++++-- apps/js-sdk/firecrawl/types/index.d.ts | 8 +- apps/js-sdk/package-lock.json | 674 +++++++++++++++++++++++- apps/js-sdk/package.json | 10 +- apps/js-sdk/test.ts | 28 + apps/js-sdk/tsconfig.json | 72 +++ 9 files changed, 954 insertions(+), 97 deletions(-) create mode 100644 apps/js-sdk/test.ts create mode 100644 apps/js-sdk/tsconfig.json diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 9d8237b..b945b88 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -7,9 +7,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; -import axios from 'axios'; -import dotenv from 'dotenv'; -dotenv.config(); +import axios from "axios"; +import { zodToJsonSchema } from "zod-to-json-schema"; /** * Main class for interacting with the Firecrawl API. */ @@ -19,9 +18,9 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }) { - this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + this.apiKey = apiKey || ""; if (!this.apiKey) { - throw new Error('No API key provided'); + throw new Error("No API key provided"); } } /** @@ -32,16 +31,18 @@ export default class FirecrawlApp { */ scrapeUrl(url_1) { return __awaiter(this, arguments, void 0, function* (url, params = null) { + var _a; const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; - let jsonData = { url }; - if (params) { - jsonData = Object.assign(Object.assign({}, jsonData), params); + let jsonData = Object.assign({ url }, params); + if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) { + const schema = zodToJsonSchema(params.extractorOptions.extractionSchema); + jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) }); } try { - const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { @@ -52,13 +53,13 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'scrape URL'); + this.handleError(response, "scrape URL"); } } catch (error) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -70,15 +71,15 @@ export default class FirecrawlApp { search(query_1) { return __awaiter(this, arguments, void 0, function* (query, params = null) { const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; let jsonData = { query }; if (params) { jsonData = Object.assign(Object.assign({}, jsonData), params); } try { - const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { @@ -89,13 +90,13 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'search'); + this.handleError(response, "search"); } } catch (error) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -114,7 +115,7 @@ export default class FirecrawlApp { jsonData = Object.assign(Object.assign({}, jsonData), params); } try { - const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers); if (response.status === 200) { const jobId = response.data.jobId; if (waitUntilDone) { @@ -125,14 +126,14 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'start crawl job'); + this.handleError(response, "start crawl job"); } } catch (error) { console.log(error); throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -149,13 +150,17 @@ export default class FirecrawlApp { return response.data; } else { - this.handleError(response, 'check crawl status'); + this.handleError(response, "check crawl status"); } } catch (error) { throw new Error(error.message); } - return { success: false, status: 'unknown', error: 'Internal server error.' }; + return { + success: false, + status: "unknown", + error: "Internal server error.", + }; }); } /** @@ -164,8 +169,8 @@ export default class FirecrawlApp { */ prepareHeaders() { return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; } /** @@ -200,26 +205,26 @@ export default class FirecrawlApp { const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); if (statusResponse.status === 200) { const statusData = statusResponse.data; - if (statusData.status === 'completed') { - if ('data' in statusData) { + if (statusData.status === "completed") { + if ("data" in statusData) { return statusData.data; } else { - throw new Error('Crawl job completed but no data was returned'); + throw new Error("Crawl job completed but no data was returned"); } } - else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + else if (["active", "paused", "pending", "queued"].includes(statusData.status)) { if (timeout < 2) { timeout = 2; } - yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again } else { throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); } } else { - this.handleError(statusResponse, 'check crawl status'); + this.handleError(statusResponse, "check crawl status"); } } }); @@ -231,7 +236,7 @@ export default class FirecrawlApp { */ handleError(response, action) { if ([402, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || 'Unknown error occurred'; + const errorMessage = response.data.error || "Unknown error occurred"; throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); } else { diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 9811597..6b085be 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,15 +1,17 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.17-beta.8", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.17-beta.8", "license": "MIT", "dependencies": { - "axios": "^1.6.8" + "axios": "^1.6.8", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" }, "devDependencies": { "@jest/globals": "^29.7.0", @@ -3766,6 +3768,22 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index a8275f7..3634730 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.16", + "version": "0.0.17", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", @@ -8,6 +8,7 @@ "scripts": { "build": "tsc", "publish": "npm run build && npm publish --access public", + "publish-beta": "npm run build && npm publish --access public --tag beta", "test": "jest src/**/*.test.ts" }, "repository": { @@ -17,7 +18,9 @@ "author": "Mendable.ai", "license": "MIT", "dependencies": { - "axios": "^1.6.8" + "axios": "^1.6.8", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" }, "bugs": { "url": "https://github.com/mendableai/firecrawl/issues" diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index aea15f8..85253d8 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,5 +1,6 @@ -import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios'; - +import axios, { AxiosResponse, AxiosRequestHeaders } from "axios"; +import { z } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; /** * Configuration interface for FirecrawlApp. */ @@ -12,6 +13,11 @@ export interface FirecrawlAppConfig { */ export interface Params { [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** @@ -63,9 +69,9 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || ''; + this.apiKey = apiKey || ""; if (!this.apiKey) { - throw new Error('No API key provided'); + throw new Error("No API key provided"); } } @@ -75,31 +81,48 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the scrape request. * @returns {Promise} The response from the scrape operation. */ - async scrapeUrl(url: string, params: Params | null = null): Promise { + async scrapeUrl( + url: string, + params: Params | null = null + ): Promise { const headers: AxiosRequestHeaders = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; - let jsonData: Params = { url }; - if (params) { - jsonData = { ...jsonData, ...params }; + let jsonData: Params = { url, ...params }; + if (params?.extractorOptions?.extractionSchema) { + const schema = zodToJsonSchema( + params.extractorOptions.extractionSchema as z.ZodSchema + ); + jsonData = { + ...jsonData, + extractorOptions: { + ...params.extractorOptions, + extractionSchema: schema, + mode: params.extractorOptions.mode || "llm-extraction", + }, + }; } try { - const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + const response: AxiosResponse = await axios.post( + "https://api.firecrawl.dev/v0/scrape", + jsonData, + { headers } + ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return responseData; + return responseData; } else { throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); } } else { - this.handleError(response, 'scrape URL'); + this.handleError(response, "scrape URL"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -108,31 +131,38 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the search request. * @returns {Promise} The response from the search operation. */ - async search(query: string, params: Params | null = null): Promise { + async search( + query: string, + params: Params | null = null + ): Promise { const headers: AxiosRequestHeaders = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; let jsonData: Params = { query }; if (params) { jsonData = { ...jsonData, ...params }; } try { - const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + const response: AxiosResponse = await axios.post( + "https://api.firecrawl.dev/v0/search", + jsonData, + { headers } + ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return responseData; + return responseData; } else { throw new Error(`Failed to search. Error: ${responseData.error}`); } } else { - this.handleError(response, 'search'); + this.handleError(response, "search"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -143,14 +173,23 @@ export default class FirecrawlApp { * @param {number} timeout - Timeout in seconds for job status checks. * @returns {Promise} The response from the crawl operation. */ - async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { + async crawlUrl( + url: string, + params: Params | null = null, + waitUntilDone: boolean = true, + timeout: number = 2 + ): Promise { const headers = this.prepareHeaders(); let jsonData: Params = { url }; if (params) { jsonData = { ...jsonData, ...params }; } try { - const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + const response: AxiosResponse = await this.postRequest( + "https://api.firecrawl.dev/v0/crawl", + jsonData, + headers + ); if (response.status === 200) { const jobId: string = response.data.jobId; if (waitUntilDone) { @@ -159,13 +198,13 @@ export default class FirecrawlApp { return { success: true, jobId }; } } else { - this.handleError(response, 'start crawl job'); + this.handleError(response, "start crawl job"); } } catch (error: any) { - console.log(error) + console.log(error); throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -176,16 +215,23 @@ export default class FirecrawlApp { async checkCrawlStatus(jobId: string): Promise { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { - const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + const response: AxiosResponse = await this.getRequest( + `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + headers + ); if (response.status === 200) { return response.data; } else { - this.handleError(response, 'check crawl status'); + this.handleError(response, "check crawl status"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, status: 'unknown', error: 'Internal server error.' }; + return { + success: false, + status: "unknown", + error: "Internal server error.", + }; } /** @@ -194,8 +240,8 @@ export default class FirecrawlApp { */ prepareHeaders(): AxiosRequestHeaders { return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; } @@ -206,7 +252,11 @@ export default class FirecrawlApp { * @param {AxiosRequestHeaders} headers - The headers for the request. * @returns {Promise} The response from the POST request. */ - postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise { + postRequest( + url: string, + data: Params, + headers: AxiosRequestHeaders + ): Promise { return axios.post(url, data, { headers }); } @@ -216,7 +266,10 @@ export default class FirecrawlApp { * @param {AxiosRequestHeaders} headers - The headers for the request. * @returns {Promise} The response from the GET request. */ - getRequest(url: string, headers: AxiosRequestHeaders): Promise { + getRequest( + url: string, + headers: AxiosRequestHeaders + ): Promise { return axios.get(url, { headers }); } @@ -227,27 +280,38 @@ export default class FirecrawlApp { * @param {number} timeout - Timeout in seconds for job status checks. * @returns {Promise} The final job status or data. */ - async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise { + async monitorJobStatus( + jobId: string, + headers: AxiosRequestHeaders, + timeout: number + ): Promise { while (true) { - const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + const statusResponse: AxiosResponse = await this.getRequest( + `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + headers + ); if (statusResponse.status === 200) { const statusData = statusResponse.data; - if (statusData.status === 'completed') { - if ('data' in statusData) { + if (statusData.status === "completed") { + if ("data" in statusData) { return statusData.data; } else { - throw new Error('Crawl job completed but no data was returned'); + throw new Error("Crawl job completed but no data was returned"); } - } else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + } else if ( + ["active", "paused", "pending", "queued"].includes(statusData.status) + ) { if (timeout < 2) { timeout = 2; } - await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again } else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + throw new Error( + `Crawl job failed or was stopped. Status: ${statusData.status}` + ); } } else { - this.handleError(statusResponse, 'check crawl status'); + this.handleError(statusResponse, "check crawl status"); } } } @@ -259,10 +323,15 @@ export default class FirecrawlApp { */ handleError(response: AxiosResponse, action: string): void { if ([402, 409, 500].includes(response.status)) { - const errorMessage: string = response.data.error || 'Unknown error occurred'; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + const errorMessage: string = + response.data.error || "Unknown error occurred"; + throw new Error( + `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` + ); } else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + throw new Error( + `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` + ); } } } diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 7f79d64..40d95c4 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -1,4 +1,5 @@ -import { AxiosResponse, AxiosRequestHeaders } from 'axios'; +import { AxiosResponse, AxiosRequestHeaders } from "axios"; +import { z } from "zod"; /** * Configuration interface for FirecrawlApp. */ @@ -10,6 +11,11 @@ export interface FirecrawlAppConfig { */ export interface Params { [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** * Response interface for scraping operations. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 363f301..337972f 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -9,19 +9,481 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "@mendable/firecrawl-js": "^0.0.17-beta.8", + "axios": "^1.6.8", + "ts-node": "^10.9.2", + "typescript": "^5.4.5", + "zod": "^3.23.8" + }, + "devDependencies": { + "tsx": "^4.9.3" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz", + "integrity": "sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.20.2.tgz", + "integrity": "sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz", + "integrity": "sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.20.2.tgz", + "integrity": "sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz", + "integrity": "sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz", + "integrity": "sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz", + "integrity": "sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz", + "integrity": "sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz", + "integrity": "sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz", + "integrity": "sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz", + "integrity": "sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz", + "integrity": "sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz", + "integrity": "sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz", + "integrity": "sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz", + "integrity": "sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz", + "integrity": "sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz", + "integrity": "sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz", + "integrity": "sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz", + "integrity": "sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz", + "integrity": "sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz", + "integrity": "sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz", + "integrity": "sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz", + "integrity": "sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" } }, "node_modules/@mendable/firecrawl-js": { - "version": "0.0.15", - "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz", - "integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==", + "version": "0.0.17-beta.8", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.17-beta.8.tgz", + "integrity": "sha512-d65AW+y4YUQ9oU4Jy8dqiuKBPr+QkAyOKYEwFev/GOpGbNfU6lBUGJlAujVXaVY6fDbUGkHoaEzUbuTsqZV+Ng==", "dependencies": { + "@mendable/firecrawl-js": "^0.0.17-beta.5", "axios": "^1.6.8", - "dotenv": "^16.4.5" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" } }, + "node_modules/@tsconfig/node10": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", + "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" + }, + "node_modules/@types/node": { + "version": "20.12.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.11.tgz", + "integrity": "sha512-vDg9PZ/zi+Nqp6boSOT7plNuthRugEKixDv5sFTIpkE89MmNtEArAShI4mxuX2+UrLEe9pxC1vm2cjm9YlWbJw==", + "peer": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/acorn": { + "version": "8.11.3", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", + "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.2.tgz", + "integrity": "sha512-cjkyv4OtNCIeqhHrfS81QWXoCBPExR/J62oyEqepVw8WaQeSqpW2uhuLPh1m9eWhDuOo/jUXVTlifvesOWp/4A==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -48,6 +510,11 @@ "node": ">= 0.8" } }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==" + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -56,15 +523,50 @@ "node": ">=0.4.0" } }, - "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/esbuild": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", + "integrity": "sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, "engines": { "node": ">=12" }, - "funding": { - "url": "https://dotenvx.com" + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.20.2", + "@esbuild/android-arm": "0.20.2", + "@esbuild/android-arm64": "0.20.2", + "@esbuild/android-x64": "0.20.2", + "@esbuild/darwin-arm64": "0.20.2", + "@esbuild/darwin-x64": "0.20.2", + "@esbuild/freebsd-arm64": "0.20.2", + "@esbuild/freebsd-x64": "0.20.2", + "@esbuild/linux-arm": "0.20.2", + "@esbuild/linux-arm64": "0.20.2", + "@esbuild/linux-ia32": "0.20.2", + "@esbuild/linux-loong64": "0.20.2", + "@esbuild/linux-mips64el": "0.20.2", + "@esbuild/linux-ppc64": "0.20.2", + "@esbuild/linux-riscv64": "0.20.2", + "@esbuild/linux-s390x": "0.20.2", + "@esbuild/linux-x64": "0.20.2", + "@esbuild/netbsd-x64": "0.20.2", + "@esbuild/openbsd-x64": "0.20.2", + "@esbuild/sunos-x64": "0.20.2", + "@esbuild/win32-arm64": "0.20.2", + "@esbuild/win32-ia32": "0.20.2", + "@esbuild/win32-x64": "0.20.2" } }, "node_modules/follow-redirects": { @@ -99,6 +601,37 @@ "node": ">= 6" } }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.7.4", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.4.tgz", + "integrity": "sha512-ofbkKj+0pjXjhejr007J/fLf+sW+8H7K5GCm+msC8q3IpvgjobpyPqSRFemNyIMxklC0zeJpi7VDFna19FacvQ==", + "dev": true, + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==" + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -122,6 +655,123 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/tsx": { + "version": "4.9.3", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.9.3.tgz", + "integrity": "sha512-czVbetlILiyJZI5zGlj2kw9vFiSeyra9liPD4nG+Thh4pKTi0AmMEQ8zdV/L2xbIVKrIqif4sUNrsMAOksx9Zg==", + "dev": true, + "dependencies": { + "esbuild": "~0.20.2", + "get-tsconfig": "^4.7.3" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "peer": true + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "engines": { + "node": ">=6" + } + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 563e1e3..9492e07 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,7 +11,13 @@ "author": "", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "@mendable/firecrawl-js": "^0.0.17-beta.8", + "axios": "^1.6.8", + "ts-node": "^10.9.2", + "typescript": "^5.4.5", + "zod": "^3.23.8" + }, + "devDependencies": { + "tsx": "^4.9.3" } } diff --git a/apps/js-sdk/test.ts b/apps/js-sdk/test.ts new file mode 100644 index 0000000..a35c369 --- /dev/null +++ b/apps/js-sdk/test.ts @@ -0,0 +1,28 @@ +import FirecrawlApp from "@mendable/firecrawl-js"; +import { z } from "zod"; + +async function a() { + const app = new FirecrawlApp({ + apiKey: "fc-YOUR_FIRECRAWL_API_KEY", + }); + + // Define schema to extract contents into + const schema = z.object({ + top: z + .array( + z.object({ + title: z.string(), + points: z.number(), + by: z.string(), + commentsURL: z.string(), + }) + ) + .length(5) + .describe("Top 5 stories on Hacker News"), + }); + const scrapeResult = await app.scrapeUrl("https://news.ycombinator.com", { + extractorOptions: { extractionSchema: schema }, + }); + console.log(scrapeResult.data["llm_extraction"]); +} +a(); diff --git a/apps/js-sdk/tsconfig.json b/apps/js-sdk/tsconfig.json new file mode 100644 index 0000000..affe0ed --- /dev/null +++ b/apps/js-sdk/tsconfig.json @@ -0,0 +1,72 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig.json to read more about this file */ + + /* Basic Options */ + // "incremental": true, /* Enable incremental compilation */ + "target": "es6" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, + "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */, + // "lib": [], /* Specify library files to be included in the compilation. */ + // "allowJs": true, /* Allow javascript files to be compiled. */ + // "checkJs": true, /* Report errors in .js files. */ + // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ + "declaration": true /* Generates corresponding '.d.ts' file. */, + // "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */ + // "sourceMap": true, /* Generates corresponding '.map' file. */ + // "outFile": "./", /* Concatenate and emit output to single file. */ + "outDir": "./build" /* Redirect output structure to the directory. */, + // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ + // "composite": true, /* Enable project compilation */ + // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ + // "removeComments": true, /* Do not emit comments to output. */ + // "noEmit": true, /* Do not emit outputs. */ + // "importHelpers": true, /* Import emit helpers from 'tslib'. */ + // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ + // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ + + /* Strict Type-Checking Options */ + "strict": false /* Enable all strict type-checking options. */, + // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ + // "strictNullChecks": true, /* Enable strict null checks. */ + // "strictFunctionTypes": true, /* Enable strict checking of function types. */ + // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ + // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ + // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ + // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ + + /* Additional Checks */ + // "noUnusedLocals": true, /* Report errors on unused locals. */ + // "noUnusedParameters": true, /* Report errors on unused parameters. */ + // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ + // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ + + /* Module Resolution Options */ + // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ + // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ + // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ + // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ + // "typeRoots": [], /* List of folders to include type definitions from. */ + // "types": [], /* Type declaration files to be included in compilation. */ + // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ + "resolveJsonModule": true, + "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, + // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + + /* Source Map Options */ + // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ + // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ + + /* Experimental Options */ + // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ + // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ + + /* Advanced Options */ + "skipLibCheck": true /* Skip type checking of declaration files. */, + "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */ + }, + "include": ["src", "test.ts"], + "exclude": ["node_modules", "**/__tests__/*"] +} From e6dbbf1bab2659a25ef99abbf7f7dd939671b553 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:16:59 -0700 Subject: [PATCH 2/8] Nick: fixes js and pydantic implementation --- apps/js-sdk/firecrawl/build/index.js | 7 ++++- apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/src/index.ts | 8 ++++-- apps/js-sdk/package-lock.json | 9 +++--- apps/js-sdk/package.json | 2 +- apps/js-sdk/test.ts | 4 +-- apps/python-sdk/example.py | 37 ++++++++++++++++++++----- apps/python-sdk/firecrawl/firecrawl.py | 38 ++++++++++++++++++++++---- apps/python-sdk/setup.py | 2 +- 9 files changed, 82 insertions(+), 27 deletions(-) diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index b945b88..6e0f367 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -8,6 +8,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge }); }; import axios from "axios"; +import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; /** * Main class for interacting with the Firecrawl API. @@ -38,7 +39,11 @@ export default class FirecrawlApp { }; let jsonData = Object.assign({ url }, params); if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) { - const schema = zodToJsonSchema(params.extractorOptions.extractionSchema); + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) }); } try { diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 3634730..a9359cf 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.17", + "version": "0.0.19", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 85253d8..0319c74 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -91,9 +91,11 @@ export default class FirecrawlApp { } as AxiosRequestHeaders; let jsonData: Params = { url, ...params }; if (params?.extractorOptions?.extractionSchema) { - const schema = zodToJsonSchema( - params.extractorOptions.extractionSchema as z.ZodSchema - ); + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } jsonData = { ...jsonData, extractorOptions: { diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 337972f..4d26319 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.17-beta.8", + "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", "ts-node": "^10.9.2", "typescript": "^5.4.5", @@ -421,11 +421,10 @@ } }, "node_modules/@mendable/firecrawl-js": { - "version": "0.0.17-beta.8", - "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.17-beta.8.tgz", - "integrity": "sha512-d65AW+y4YUQ9oU4Jy8dqiuKBPr+QkAyOKYEwFev/GOpGbNfU6lBUGJlAujVXaVY6fDbUGkHoaEzUbuTsqZV+Ng==", + "version": "0.0.19", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.19.tgz", + "integrity": "sha512-u9BDVIN/bftDztxLlE2cf02Nz0si3+Vmy9cANDFHj/iriT3guzI8ITBk4uC81CyRmPzNyXrW6hSAG90g9ol4cA==", "dependencies": { - "@mendable/firecrawl-js": "^0.0.17-beta.5", "axios": "^1.6.8", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 9492e07..0e93fe3 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,7 +11,7 @@ "author": "", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.17-beta.8", + "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", "ts-node": "^10.9.2", "typescript": "^5.4.5", diff --git a/apps/js-sdk/test.ts b/apps/js-sdk/test.ts index a35c369..5419c2d 100644 --- a/apps/js-sdk/test.ts +++ b/apps/js-sdk/test.ts @@ -3,7 +3,7 @@ import { z } from "zod"; async function a() { const app = new FirecrawlApp({ - apiKey: "fc-YOUR_FIRECRAWL_API_KEY", + apiKey: "fc-YOUR_API_KEY", }); // Define schema to extract contents into @@ -20,7 +20,7 @@ async function a() { .length(5) .describe("Top 5 stories on Hacker News"), }); - const scrapeResult = await app.scrapeUrl("https://news.ycombinator.com", { + const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", { extractorOptions: { extractionSchema: schema }, }); console.log(scrapeResult.data["llm_extraction"]); diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index b178400..3ca84af 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -1,13 +1,36 @@ from firecrawl import FirecrawlApp -app = FirecrawlApp(api_key="YOUR_API_KEY") +app = FirecrawlApp(api_key="fc-YOUR_API_KEY") -crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) -print(crawl_result[0]['markdown']) +# crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) -job_id = crawl_result['jobId'] -print(job_id) +# print(crawl_result[0]['markdown']) + +# job_id = crawl_result['jobId'] +# print(job_id) + +# status = app.check_crawl_status(job_id) +# print(status) +from pydantic import BaseModel, Field +from typing import List, Optional + +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +a = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) -status = app.check_crawl_status(job_id) -print(status) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 441b940..e955ffe 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -1,4 +1,5 @@ import os +from typing import Any, Dict, Optional import requests import time @@ -8,26 +9,51 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - def scrape_url(self, url, params=None): + from pydantic import BaseModel + from typing import Optional, Dict, Any + + class ScrapeParams(BaseModel): + url: str + extractorOptions: Optional[Dict[str, Any]] = None + + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}' } - json_data = {'url': url} + # Prepare the base scrape parameters with the URL + scrape_params = {'url': url} + + # If there are additional params, process them if params: - json_data.update(params) + # Initialize extractorOptions if present + extractor_options = params.get('extractorOptions', {}) + # Check and convert the extractionSchema if it's a Pydantic model + if 'extractionSchema' in extractor_options: + if hasattr(extractor_options['extractionSchema'], 'schema'): + extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() + # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided + extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') + # Update the scrape_params with the processed extractorOptions + scrape_params['extractorOptions'] = extractor_options + + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key != 'extractorOptions': + scrape_params[key] = value + print(scrape_params) + # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', headers=headers, - json=json_data + json=scrape_params ) if response.status_code == 200: response = response.json() - if response['success'] == True: + if response['success']: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: error_message = response.json().get('error', 'Unknown error occurred') raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index a3589e3..b870da6 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='firecrawl-py', - version='0.0.6', + version='0.0.7', url='https://github.com/mendableai/firecrawl', author='Mendable.ai', author_email='nick@mendable.ai', From 4c88d5da663d7dfb924b71853bf8677842d92172 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:35:16 -0700 Subject: [PATCH 3/8] Nick: v8 python --- .../build/lib/firecrawl/firecrawl.py | 53 ++++++++++++++---- .../python-sdk/dist/firecrawl-py-0.0.6.tar.gz | Bin 3476 -> 0 bytes .../python-sdk/dist/firecrawl-py-0.0.8.tar.gz | Bin 0 -> 4068 bytes .../dist/firecrawl_py-0.0.6-py3-none-any.whl | Bin 2573 -> 0 bytes .../dist/firecrawl_py-0.0.8-py3-none-any.whl | Bin 0 -> 3119 bytes apps/python-sdk/firecrawl/firecrawl.py | 8 +-- .../python-sdk/firecrawl_py.egg-info/PKG-INFO | 2 +- apps/python-sdk/setup.py | 4 +- 8 files changed, 47 insertions(+), 20 deletions(-) delete mode 100644 apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz create mode 100644 apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz delete mode 100644 apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl create mode 100644 apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl diff --git a/apps/python-sdk/build/lib/firecrawl/firecrawl.py b/apps/python-sdk/build/lib/firecrawl/firecrawl.py index ef3eb53..701810c 100644 --- a/apps/python-sdk/build/lib/firecrawl/firecrawl.py +++ b/apps/python-sdk/build/lib/firecrawl/firecrawl.py @@ -1,5 +1,7 @@ import os +from typing import Any, Dict, Optional import requests +import time class FirecrawlApp: def __init__(self, api_key=None): @@ -7,26 +9,45 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - def scrape_url(self, url, params=None): + + + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}' } - json_data = {'url': url} + # Prepare the base scrape parameters with the URL + scrape_params = {'url': url} + + # If there are additional params, process them if params: - json_data.update(params) + # Initialize extractorOptions if present + extractor_options = params.get('extractorOptions', {}) + # Check and convert the extractionSchema if it's a Pydantic model + if 'extractionSchema' in extractor_options: + if hasattr(extractor_options['extractionSchema'], 'schema'): + extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() + # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided + extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') + # Update the scrape_params with the processed extractorOptions + scrape_params['extractorOptions'] = extractor_options + + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key != 'extractorOptions': + scrape_params[key] = value + # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', headers=headers, - json=json_data + json=scrape_params ) if response.status_code == 200: response = response.json() - if response['success'] == True: + if response['success']: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: error_message = response.json().get('error', 'Unknown error occurred') raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') @@ -88,11 +109,23 @@ class FirecrawlApp: 'Authorization': f'Bearer {self.api_key}' } - def _post_request(self, url, data, headers): - return requests.post(url, headers=headers, json=data) + def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.post(url, headers=headers, json=data) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response - def _get_request(self, url, headers): - return requests.get(url, headers=headers) + def _get_request(self, url, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.get(url, headers=headers) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response def _monitor_job_status(self, job_id, headers, timeout): import time diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz deleted file mode 100644 index c1b4206e6db72385a8c4bb6b0d84642b749911bb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3476 zcmV;F4QuiriwFq8oGNAl|7K}&Wn*$-cWf-HXy4-XDda*@##mVa ze`TrQadb+0PS<)#SL~G3b;By#rIJ{0@)ZbbeM(-fvt=BSH$VPN=5b1%7GubxS7+9< zc*Twrx`4@+S(b>?PG`ZhWj=HKc-2|4D4?^DQQj$+vL59yUwTSj=thTfU3bh@l!vDT zcIy2Pt6H~obT{~afby`0*cFsp*4ri9*+RG=l!Sbi0{W9|Ni#yjFuoFG9p@yA z38R9CYa(D^!U&B5qNccFGr=<^+OjUrvn0=8JR=x=5FWv-5I?M7z7(KnErW?%FmeU) z0tsof$RUg%t2khxAg$-mUx*`zoMj|Q<4Ybeq)PBr5~?+Ml;u3*mkd9#bWVLHXO!Ql zpYdp+X3iRXLd@b=*29P7EQ0+GLz+Rz1Hy=5psd*FU&*B{`S)ogiB5QOYFX3ibVkL} zf^X&cMoa}ADlWik6%Cf?YReIyq}-^XG%ay2^E6yOpb>y zDf9)g;qKz>(~|#3NiroqSd9RwSKFV@FSavyX7@H(F6a zd!hx<{Olc(qk7Ll~D1*kgY0hN(wRNpmN}0&ROw(5`-YP$} z)wYU>`g2sMqLv4m2bH2g-51*;YL`1Yt+<~O(352xNW0AFdoW7u0o^w>2~9L*S)N9! zix#T#1+s6zedHaSj8oT_4GC&R!>fr~?M%s~+#ki|Z+eT6Q@^=wH7-lZ-M}QGi1aFw zTl8>p?`T+YEhJ~@B1cOU4nY??Rq~&f^&8$)B+Tb?u;d`%dOkI=knRjuRy6+}&)P)f z{t_3#>J|OKSNV!W`Dz9d2UA3i5;&4Uu;QuQg!R%K{5Drs>DLE0bjPo!cIDVUENluJ=m@zVA(9X1Cpe(Vg(qxeK zoCtJpF`1&iPuul1t~j{Xgrz(Ny#c<~#9uQ1eX%oHE>Hl?l_f9eqfNG88u94(;3MTEZymk*att%0f3?Q+@6K;qVrGtqFuG~ zsA|7xlZON@_yL#MD#;YeBP>eYtj2_V#nH1tNa(7muG`I-U*f~z-DWt1b_p?0+HYAVNgv(7a=q*C)wA%UN#DZ0TMw7sFn@ZIA9 z>}Gv)&uyqa3+xL^PoXz^i_Df9UV;L7lBOWRXFw4mgK~Qq@h-6jm|)m0G_~LvX6xIQ#PgX}EJ;BCzCO^PbpGP(Z82FDO5;E^&WY1SCsggC1D%f? zbN}zFpa1o@@Bekp{lA0U{~PZ*{~M3TCjH~S-v2ABI(KOQpO^nJ8g<7e|HGXB82^9J z^PgUKa55a2^B?2?mH+oV9>Gd$M1DXHfj{Du-$OT`}b4~uoq4pndz&rcz zz#A(u*MA28|Ni)o*MI%-_XhtB{ww^iPME(a0N9WJ-JUxtum6T4ycg8#c84ba;~+)j zeJm{fN#BkYmL(7N8CdK*s%Q#|3r{$pxHafaZ-iKG8@V>Wj z7A#}YWy{`5L9ma?3HZD@2NwjFs8+xt?~0ROX_zy43%_N*jLQ@VogNbCNlON5C&P+` z`svRG8%v3e)p1yP=A_S>j^C~mW}gz9CP~PBnKaV*UBr>yZuVDsto#qz0|w0Pe>0l0 zlzgmnd2N@YuFFq2U2(Gg(FS5+IQ)NI7nP#EDV%bglPB)0Jn3Az9VN^SZne}V`fq!StVu~*+0@wif-*;)*LZ#$0F@KcbKMOE zsI#EQ;M}dFMz`9O=eEdwp9x`K;EZ@a*PK#KE05%=F-WR?(dfrQv1_E%$S1}5O>1sH zr95PTJkeLl6z^U?chrf~Dfw6~@tr)!`R?M{Zjx?Q>)j+69+C5b+iR17`+b`XUH76% zmz>$tCq&zlLUU}{vXk8=_PgkP6kjz?snXJjK9@~(u1Ju~8nbokuPa$J=&Z|OThw>B zer=t~MSyMkOYI)OGSc;5$3aMPVFLO0|HMg1aT+IaEA5rC`g=PF&Ehw44!2($lLQNg zP$i7)4jDd`F4EiSGNfgDNy=a*=IQ#HRvrTRH4R|Vp0@ASeq)wjKo81A+66~{&N|Cf zj-6VkGCH;lbwk=W8mHoJ6-Ss);Nen@43Luaa;J`NW4>zckD9ns&BFB_l~i=7bP`Xh za;sn$EJ|h&R+%mh#oJC^Q5vXISpJM!ZS6ity4?KvEq1@7DCvCEjN4zR0vn!m(>Z^x zI8_Fd5~o_aF3~N);AAy97Q1_JIImGyx5k>oXm8Ys-Cl zAvDol>(nRTVSIHB-eH2(diuf?8_K$?CHV3kkgS#LAj(}_pf;EMq@LHv)Lwmah8A@;A!8o8c&FOWuUW_Tw^twaUGn~l8t4uq4TNX0R*)0u>b8LLX)?QRcJLK56N%!Oz0}nxR_06LHk8+CS6b-S+^JWcHmRVdagnsSR3W5 zGewIy2tuH`M88vCwqwoROYiW;8+!d|r`#MZ{X;dF$_E1Ke&MeX=zdg$Umr5meHXD0 zpjc>BO}e$?%+j??8vjh{fC_WR+?@a2`T3uD|I4>K|J(ikm%+(N&)E7sKmYTRwZj&R zV;;@p&KKJMXXStP&;gkIPm}*){QsfzKhXc1{14;*xBUMXycO*8-v8AF|KICk{Acn% zzJ4+9|1#Hq2LBEI8~iu;zZd`C{POPg<0o&N>_c|v_&@4x;{Vtk8vH**DPQ(x36|xF z14zH~`5$X{Rfg7@pR!$X|OQkKT(tUGZ2C8uoR`;=~KtB@6$I_8R;* z_;2vvlzWcyEGg-R>s- z_Xl0q;Qt{?`M9Ss1%v+v{|){d{5SZ2$M;Y_jsKhX|NFy%dH>U4_utsO!GDwgegE+P zhTnfa>6-lSdy4<+AlRQT?$ZAEzyEQx`TpO*$mIVWq?|*5=V5?%VP$mT&2k>%WBBq5 zzBK?oyO!|;2LH|V|NXfB@0sg=ga0xS>`v`}AN~*f)%Aa`D}Vp9KQj0K53~RH{%5)4 zSsaI=c-e-mY0KaJ@e=C4mp{*?QUujIzwq}XCw5(X^vh2!)pG(9TPl;T%+qjU-|`6n zJ%q}f|A}3D4xrKE$)^D(_U6+7x>Nb%NJz@QXB2e*g2_pP<I{-n!`Xju<FtwM!dGnE-R};2-NC5ecSil;pg*)8nex{!a~?A{rsw`%wBGAD9p`ELo7n&UaH#%{ z)Zb2TP_cQ?yzYLdI~WXmVE=>BVE+*r?$Z7P?uI@UE?v=>*0h6W^Z8$I{hdFA{~v7o z|DN&xJMsTdYyE$}`?PNW;9>gz4?q5}_xjz-_xEc5qru=x|KINnHvIo^e*p1cXP5T> zK>hz)mQar-bnhn?3mygsq~~<4cXY)LNLe?myk07ab(F3^Q0oKoVVx|)fE>N}k<7!G zyv+KLXCGc$ufi4Ei|7IdTP8^)4%+PnPnPM-al=)6#R8Acd`5XYpUQfc!g%okd8;cO z%JsE9wxZlWAh1%W|5}x*jTQgN@xM3P#{a&-|DE7}ufYF7cQhJxhfc>>)I-MqkB84* zyghVQ-j48pv_I@h{O<$s4}1Gy|L}R=;Qubl;}T*QP;ObDmaMjha6u^X`78$XC&`i~ zg!+DXF337eNfHu91^3rPK*xv?8hAtvan5FfCrq^DwJ=ShG=ct%VDv$F0HZ?uu!8Yo zfTFbwCQ`x3Im8RZr@&5=Hx+=@`Rr; z{KVopb(tJdexq)}gM}J7sqhIgi(^@jACuPstheve1VSDVMhG3{g^l)wTw0QUmj;sP zh(`yOHJwgpR4gs{miMp3RM0@q_E`XLmeqC04-DiPARXkCfe_+wMUL}6j>pbU8d)S& zx!kmR4mn~ z0#b^o5QHbPKYWRyEr<)j+tOl$l_4%)?Du1X}s|d1>Gp;QPSI3n`4I1$YRE=$H zIMLtiCXRii6&186SOCqRKfbZj*oP79OwgrR)3%gidlRX)V*-;o>e2*yHQ=8d^|@q+ zW?FAaN+#EhE5$6J`obbmY>b-UN2q_oKF}9sMHu+&x0DCZL(t_Qfyz&D%4Geeb*X2H znMnOa)0a=)$Un8zvWl_#)67&+%LC1WN>QNhi)|6L@)eyH+)oMU$ujh$T_*Gtj1pTw z^G!`c6OCDt#(`?0g{pjkZ0m6sc?T!sl=Wppf?Cn=ZsJxeQ*g;>>@KBCpyYoki`SO* zSpWr~t3uTwU9fyDvd~tw3I#eRpPkVFKcIxcoyj@C?Lik@vl4Hr7mZK7@I0unA!kHJ z&e4H_OU+c_pTLHkzPf_yT0?d#aUr!#t<}$3(Qv)os`8cADqsIAmI|>@`bN2C=%=Ly zQomR`A=f8fr07qCLl7vYN{goX^$koA5p($*{E_EzJ)Rm^$Q8hu70thdAa9X^D5=oW z`@&c0iUjFu26_Vy1!XL7P}!rUWg6hoJqy*P6IX=ALFNlcWL#P@P%wg1#D*C?mlmnm{FYj*cEq1h)h&d z*?6KYjrFIgDeF;47dAkwLjVjN6xZ1(;x1FEq8_8D{J47+CRxG8Q;( z97Zvkd$ABgt{X0moGo;LVL`k!R$vq}Kq`@3yLifT0MIi4P#(!xU5&P40^?y7h@&CI ze3mAXnc^E_hmJ>UZ&PTD=|K0lh z507D;36TMYgi{IA^qbvp+C z4gM?qFHV@hDgd}1|GUFMuay7UgZsZ;w>#<>{NF{Jx4<3unO+7MwNgt*iyRnGe9{)iJmrI7Kl77P$pYexiQLei!Ch6FQ^B(}Sx2 zayCjw%8C*AbU;KBXQwU)xvY*+bjPv^BzbZI{77!e-aqg{LDEu!Z*fwU9|!4UpuJD) zh}j3krcvZ`S7xQOe-U9|x2o+`qAdSawtxF75YfqaT2@C37B$cME@gM_;(hp93eG_p5)+L#lyMQN3L-FHa^O_I2= z)%tjQ)RPvmh5Fds9;{Y6b)$g9Zq?3Fb2XYI<;5<(IZ z+xJ&{#jrNA8-xi=j)uS!zc5$p7L3TLYKd1XFY8-dv({1nIr8vGtjJ!;3?R1=t;fvU zQp~O4DkB_kD_||jl~J~=LLOJk)2>kFGHB{N^vx(XsuVT}BjU3&=Hnq{g^bFGcq|5< zqeafpXQFCqgW(BViAJ+lFVUHPHzwQc)GJ<92+$e1mh+XI$6I=;g#O|E(Wm^>tt4~l z!@S(3qJjSB=>0pHT9l6iP9Xw97)+1>nG=N~X;j^5cCZG1)2U?Jwa+?jMec?N*3*vl z-)bd#>~Xz}EBKXB=cpszi3?gu#$(d!bT(kD%!clRlWdq`t-hN zi!!rMs=m_~tfr^L)5Gkjw=uV0Qts+ueJ|GwQ6yM7u-|tT@OE$**t7HIb zcRc9yT4d1qTZ;@koed`BU=w|Qye$;%-LVlG`RCv?2+u2L*l~{fcjZiF9S|h5#9x{E z%Yyw1omJLFZKQH9^Oxo+q8XJ~rXm@cUX~}1Yy@V1gct>f%hLNUY)LK*Apc(86e;9= zom6`6Mwa<_uRcMF^UgjYr|tLj2RI_8`0UItNLk&|CsZKr&*LC8i@%F=xc%Ii2%R~E zBAqP1yQz4Do07?TwJt+i_C$m8L|-pyW!122ZXfQ!s5K2>(GzXojq8nBegQqmCuwCI z{Wj~&6TND+PUU-T8R~|#uQX1@-6{+)+h~GQRr14f(({#S-o|{@*cY0(1I@zaH7cp- zP|0Q!smhIvUA*(a{Nf@HuA+F`sclLFb%NV(QLC-h2TA9PKflH5w-qJr3(dIwl`62| zNjIJIUB#*L84%98SgBT1aWknHcw5WJcZ7a6X^R z1`Vn+MXkP z#-c`*5%PpIdgRHIjQr*u%rm3Fy6So%@3Q`v#Jy`lqr84j zHM@4c1T3!!$+RqX;@q+w)#{Wt`neLB8r3&Ps0mA}b78+@TYEJboZ5NT_!S!nxQ!-5 zQ`E`QF!im=;8~usCb+#`Q?81}?C1H@4aJigvos+=s2+4Wr$rui%^s5sbL*vnajuP< zQ)hC!Dx#@?A@CCmJRU6YBPc=&7f-H~mDWP?chb9>sjLw!vUOFxHo8o!ahvAn@^do9 zV<_2Z`WYEF%iO06$^+~6;@%E?3t7)q2n=(heD!k#SsV-@a9N_?t1sL48ue4xpz(%Y z-wevt-qJr5oz;G0*)RM#Zgegb;g`F-)%hH-FQ8axR8_jQ<4oeU-bB6=bwGv5|JZT< zN58k7|Isn|A3Mqa7}dW2tGjP8Fs|Lvrh_dh=DV({PKzrlZl|2N|Q(fiLI zpC2AM$(Q8b@qg6Y#Q%M?e}n(uzT_|av;<4i$N{9Ueg4y`e=5YfK3KGF)TJw0cnnW6 zSm3TFJ`YaCHBIrz3o5Q_l7!JjCb|9=?KSvs@ZaFSDGwa~>pb#3;eV&o-N650XMbSu ze;37=y1{>g{|5gJ{u}(i7XQn)S@(SZ-|24Re}B*&8T{Wx$)DplreN^j;J?9tgZ~Eq z?|BdPX8hmW|L+fn=Jy|WyZ*-J4gQ<=zdt5FOXkL_DN0ie53ne#ukOV0sRY9{h&fU&*#G=OfD z|EwI6a@{iux_!pvxWaOyscQAFH&3!du<>#6la1L4iXP~sWi^feG{qECOfkh2Q%o_% W6jMww#S~NiK;{2+#s201pa1|E9WRCe literal 0 HcmV?d00001 diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl deleted file mode 100644 index 5aba56187b34e37ad84764a061ff2b3c769c0bb0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2573 zcmai$3pmsJAIIk&<&s+@Nm0UVtP9I+SjJq3h_u|A9hqU0OSw(%3@5oI|D{|~hs7k3 z6JeNb{%DDmTj^-(2%+&`=RAMuJpKQEzvun@p6~N~f3NTN^ZkBq4oCq(SpWdA4WLw! z<(5Sb9^1zosqrMo(|K%&pHGNa_yrIejm2RJXtWMK0;$!_aA6{Qw5@G*thE`AUA@3A zkftL(K~!H}7S9K8*g(=q1#bD8?!spT0RS*>75N{K8*m=;gR5=O#4)MnRadE*II+^< zlQl*IZYurz_~Lcs;g4p#cPj87$neJbW~*2FI<>7gn`h@7KHb+zMKr;~YWz|l^V6T` z_+D3(hBaCrIaC9yizEk}3oJ({8`N%JCl57Izs=VQGttDKKHMQSNWWFu_yCuu;# z-mx@kiBZj{7uS8(Gkc3%EHu)Ox9|x}d0H^~Kz8;znIajxs}(2 zY^j&q7E$Q|`<~R2jr_wHH zZ-}|aUE3(ksJbGgmt49{p8hFR1~tVDr(r*ITx$mW`Y5eaa)T~~@k0bJqbm+B_D+5GDxDSCZWRR9X_3Uelxn$%6?GwOIJA34 zaYD$qeH+LyB;pg!=9k94u1Dg*fJNHx-sVkA%a@l{li#Q}Mna5IX+<+xy31ZrPeSlu zNv_;aa{Sif`5?}-yzbR4`osm1{)0%1* zec3W=S14B?j47N_l-_b5YLfN`5wq%?6vS8^Hk&h|F3Feo;MDQn@CCMmgxw^W%6xeP z8f~`>9Se}eO*^p-_gf|T(7LN*%kn}7%F^T#^YLHrr_{GVpL&=@7U?fXyt9kbnRcc9 zf?3L*xi~+MnO$pFEvUXc{uyLl`rK-jzI>Z7R2%{Id`Ds_dsjb*?K~fj=ITpIT^&?7 zQjDwrV9F_B)Wg?FVIp@IWb~vtF;bivQ>sPMII&M873BQUR1`O{Y;tE2zB+p)r8)XF zVsU?zTyRv}l9_SkJmAjU`^%C7K32!`>YK=C2N&>WPPI8gYqvV=>U`d1jQV@OjcuiW ze1ER!gsRBZqUqh7oO=&mVQuUSjV;|LCm-pR$=0{lHUwmy!&Ko}ixjW7d%96=bFC7Z zIWD_B*(Wq0XJ4^BWX%#KcSSY+si_u3W381H!XMHS0%lihbpXBt6@{d^E=xlyBGW1N zG(SrkRT6sQ`lX=fv2(2p(L^L$d}0c~xBBjtay0eKTb$rib{;yA_$%pXs8pt@>MapY zC#TwAPh_dm5`jx_;1nYoxj_LSfLkU1E?tBo)7VZqQfH}i# zGkPTA!Lk-h+|*TpKvrVfeKpmSK41&!CD(_h2Lfxe6xPnlBhK^{7Z>F0rz&KdbcuYZ zj&EkyD(yxwvGy6AjIQ#6Ktq`V{TirZ!j)4F^kBUF`*E=w^Y|45lIyOE7q5OHa`le2 z>kFy-{Wd`=AqSt_O#vEPngJWM^2+@fS{kkz4!I+`cmLYDU75`P z#6nCLyeRaemH)b}%Q?fd?wj^+Wu+?u4!7CPK-kh*;z*!oUoYbU3OoRL9b!*M0}AFZf;byP!k3MA)b~Z#FZ(-Kt5{ zlDj4!0Kn$yz*bBY+}s{zahcu}84m`Sz8S@cBBbIalr^sRpK7GtwpkWTAcjE7N{ufc z3Ov*R;g*-B%g%z2Bh#^#1|8#gA)hdk%jfZC>zik<-hF(K^}N%0qFu7?#n|5Jqq840 zJqFdV>}LU9fK&dC#?_YGwufmCt{Eu`_w+`pB(cSdog4&1Ij&~jS?=x%qR$=IApCOV z1(l?vyZcgdlG_g`H<~t4(41<-@dd<_KkR1xt#mP6aDv%m`)k=@$`{76I=Y(IM)KlX z)r+P+iFWBlXO?IB10AP|5H~!Xcj|LhuEz_igyXDfg_)Y-`-7P-MT1R+LoaV0jqRU@`756F z=dby~b1go8S%LrECGgJsucs{T+h?P!Z|)g3BYxHu{0#sA%Ho`UKy1KxJ;G+j&m#Fd zV}jRjZ2bBE;`*P+e}>I>Bu!`wbIliueq9Yoe_kr?~h za8hWHB^^tWFDF6oFkk5e?=CRDpMK?hJ06PQ#;QR;j8=TCXG=FV8cm7lesm34;d?-8Qg-nZ) zZ5uMsadsV#qAkv0>JEsK^noc4WGo3rH66FLq^_rVq^Rbm_A)Qj;g7bl`^1b-wVXz5 z#SL`gnHsj!tNYa=f=lnTH>^7pFtkfyM$ zUtYGc-omj2c$+25FLO*|c_hp@X^YS4BW8EMWR%bJ40VCQWw?YvqJ@T*v{z84e3hBk z&f5=!&Oh_o{1M}ArQJJ?kt(IyqG4ROxUE=Bej5!-!duCF1WQ70@ZV4N+R-_cjPgWD zK93{8{na-LhZv~~CgC2AtGwZj)+P12S{PyL%6Xh zq;useu6}vo;#`k_eK1$K6>MGSnx#?y2wh;v{_H4=)iiyFv~?~BDs+6+xg$d0bBnHCxk_g9cq$zk4~ zT3EJL72yU{V(L!v!%e7x&Mf{vxg(_`w9%oKe67DGsqs*8lnEbq&;-{A6Y7(10Mjjm zaww^SuT`*O*xwg&+$`vHk-+ri4;Zx~RyTevAz{1rBKTG{i%Syj%RRItwvX4)w6_Ci|9jl_; z>~Z&@(DPn$j!JwLHd!JfZ24PiY{_*^eR~QjhQX9%4sVg$574I{%^E!qH;717mp#Ji z^=?K1YnUnFnl*{aNf$%PvN(52r~9rGVM`7;1^AX22iwl{;&ctoI_Jg`Kz!mV4UpOc znm47s`c%J+zWUmemNv##Mafqs=LxfJ!RLq#@0d={ROGiDAE`O62cBNEIdZGcgJJwi z#~4ad2T6`D=OO7buNfOb84P0twm_Z=k?!(@q24_H9Gv4a0X0}sr|h2@M{SlH6!bya z!8vA8AV@8cmIx4EucR?a5SMv=BWunVc1GsQ#})UD<W|HZwR#(MTUphfqcqCs=id1l^=TNk91NaiZ9^|lLI}Td8GZEVzob-q6`{)#>9zmr+R8=2UjNI zuWc)F*vsAc-pV{wNT`~R-nB(1GuPm*rT_L)u{du!!(*{3u7H379gk0n;~rF3@(Vf4 zO6S4&76M}P@gZ?4bpt{Q*g+t#cHoiT?TC9IIdGZjhM*E9C^}G~q2+L4d9H3`STo5p zXg0IxeyZHLV`V5wzJ(o?!ud{BrVvvYeCCZfwO*II#Wgr}su>sjM!e`*@r;ls7e2=b z6Em^(Yg0N(bBCn5d@psoO^j*3Ip01XR9~wVS#S|cD4Y}g)sp@;gm~-q!baaS@IDfl z7{%3fKm=V-ZC8$@77Hkye1`IPW-VEi3BDp5^E@&+j~EmFQvc&Umj_jANZRm*`WpJ_ ze9gO)x;{Kk_7+*XNP5n8)sAcO_LH1pBZG`m-2KmMfw!7(iGewa2J)+VjrGfsy zQf*;|PBo9qLlK6Tu}|jXKzmIrHhR^Aex?`*wZMl&Q^gvo4;+-m;Yr~>?cFa?(1R4J z)QVfLv>*3(D%y-aF-)A6-zxHuf!l5Nxn*ugIM}Y?^W{!EG`w5HoNX&=r2}|E<`#SX zBmL|wuZ|>sf=RCBE_g>xMLu&+kQ`)Xpi{C){}PtB7h{Nui|^$e7h zmXelIkaEQO_)21N&estbl!=;#nu!`Faeyg8nq6c6DDDvGF`BWe!)h-NLJE0>M$^lW zaURCL*>Vc+Ssv`q5f`_QXE}(xIWh#BiRWu6hEnJ6O$2;NxS2cKE0=NySd}KT;zB*i z3w9}%dNiWQTviAS^s{quiPfJ$I3N>@4UG1bm{!GkT<}ohTOXkb?-u*OD|@XG-qDo1 zue@Z-FHxIas3|?@_75t1&fGczg?&KdPXQflzc1ylO%~sm*|Z`z_f-NvYx1%d3Z+Xw ztc4+rGNL76LxaP0M$!{<3*Pz!w4@HaA5m+hrPB|Wx`O~$7>-Df$Tf_}jV{r>97c@p z!lxL;By|Y=y0u1P;xNoIowjbHkyyF>6k%W(J&75tT6#-);sSK#BAFP-HPMj;`&n(i z*FA0Boo+a}OZf))ev2e;PBM|%Gbl+40APkJ!JjdWP)L0vjp(7Kpa@k!^yu#{+$nZ! zaqKSy_(4MaozBWVYM@ySsyC~~SfiXxGtx70tk+mC@;B936Ff53P1AG0{x+BLJ&ran zfpk8f^-#-OclS1NicfDmyz_XfE|##}?s`^_)<2BHd37MDS|!^_*m(lHr;~fJ=}v;1 z&jpJ~t)9JhvsB}`4xd#uVnio34eP)c?{aREJjz(WPoNQ}S6Of4e$Lj+$u?xIoaaaa zhOfI@6gUpC{>40DQ8vO`Drfc7+aPY-?Ol?CEJb(6et@fM?ooOU#RW@qYlYqT?jW`& z(4C{IyovY26E9@JEbU9IG3k9gTF0%MYaouNebke`HoNfeA7aHZcAA>p`dAB|>~a4; z2H9Mrvm(N#i8u4o=Gx-{V_U$22MPsr9qV8NGzBF)@PFq>@-P4E!H0kSd<%X*rpg}? z|CSs71^@u~aN~aT@<+zMdHFj72>zjr|L5$J$dk_bj$HW}@-H8qF*7E$Z8X375%3>z{UXo+ literal 0 HcmV?d00001 diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index e955ffe..701810c 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -9,12 +9,7 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - from pydantic import BaseModel - from typing import Optional, Dict, Any - - class ScrapeParams(BaseModel): - url: str - extractorOptions: Optional[Dict[str, Any]] = None + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { @@ -41,7 +36,6 @@ class FirecrawlApp: for key, value in params.items(): if key != 'extractorOptions': scrape_params[key] = value - print(scrape_params) # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', diff --git a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO index 61589c2..e54fda5 100644 --- a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO +++ b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: firecrawl-py -Version: 0.0.6 +Version: 0.0.8 Summary: Python SDK for Firecrawl API Home-page: https://github.com/mendableai/firecrawl Author: Mendable.ai diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index b870da6..78a4d84 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -2,12 +2,12 @@ from setuptools import setup, find_packages setup( name='firecrawl-py', - version='0.0.7', + version='0.0.8', url='https://github.com/mendableai/firecrawl', author='Mendable.ai', author_email='nick@mendable.ai', description='Python SDK for Firecrawl API', - packages=find_packages(), + packages=find_packages(), install_requires=[ 'requests', ], From d9da4b53f89e26f600a0093ca30aaf01e773e04c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:36:40 -0700 Subject: [PATCH 4/8] Update example.py --- apps/python-sdk/example.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index 3ca84af..a2e0173 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -3,15 +3,16 @@ from firecrawl import FirecrawlApp app = FirecrawlApp(api_key="fc-YOUR_API_KEY") -# crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) +crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) -# print(crawl_result[0]['markdown']) +print(crawl_result[0]['markdown']) -# job_id = crawl_result['jobId'] -# print(job_id) +job_id = crawl_result['jobId'] +print(job_id) + +status = app.check_crawl_status(job_id) +print(status) -# status = app.check_crawl_status(job_id) -# print(status) from pydantic import BaseModel, Field from typing import List, Optional From aa6b84c5fa591900c855a0419d5be9b3ca14f08b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:41:15 -0700 Subject: [PATCH 5/8] Nick: readme --- README.md | 26 ++++++++++++++++++++++++++ apps/python-sdk/README.md | 25 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/README.md b/README.md index 9ac5636..17ba373 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,32 @@ url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it: + +```python +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +data = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) +print(data["llm_extraction"]) +``` + ### Search for a query Performs a web search, retrieve the top results, extract data from each page, and returns their markdown. diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 02ad307..38ca843 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -46,6 +46,31 @@ To scrape a single URL, use the `scrape_url` method. It takes the URL as a param url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it: + +```python +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +data = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) +print(data["llm_extraction"]) +``` ### Search for a query From 10330342012c580544b2ffd99e21e6a1c5451365 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:45:19 -0700 Subject: [PATCH 6/8] Update README.md --- README.md | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 17ba373..205ff3f 100644 --- a/README.md +++ b/README.md @@ -215,8 +215,6 @@ curl -X POST https://api.firecrawl.dev/v0/scrape \ ``` -Coming soon to the Langchain and LLama Index integrations. - ## Using Python SDK ### Installing Python SDK @@ -250,7 +248,7 @@ scraped_data = app.scrape_url(url) ### Extracting structured data from a URL -With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it: +With LLM extraction, you can easily extract structured data from any URL. We support pydanti schemas to make it easier for you too. Here is how you to use it: ```python class ArticleSchema(BaseModel): @@ -283,6 +281,125 @@ query = 'What is Mendable?' search_result = app.search(query) ``` +## Using the Node SDK + +### Installation + +To install the Firecrawl Node SDK, you can use npm: + +```bash +npm install @mendable/firecrawl-js +``` + +### Usage + +1. Get an API key from [firecrawl.dev](https://firecrawl.dev) +2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class. + + +### Scraping a URL + +To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary. + +```js +try { + const url = 'https://example.com'; + const scrapedData = await app.scrapeUrl(url); + console.log(scrapedData); + +} catch (error) { + console.error( + 'Error occurred while scraping:', + error.message + ); +} +``` + + +### Crawling a Website + +To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format. + +```js +const crawlUrl = 'https://example.com'; +const params = { + crawlerOptions: { + excludes: ['blog/'], + includes: [], // leave empty for all pages + limit: 1000, + }, + pageOptions: { + onlyMainContent: true + } +}; +const waitUntilDone = true; +const timeout = 5; +const crawlResult = await app.crawlUrl( + crawlUrl, + params, + waitUntilDone, + timeout +); + +``` + + +### Checking Crawl Status + +To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job. + +```js +const status = await app.checkCrawlStatus(jobId); +console.log(status); +``` + +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it: + +```js +import FirecrawlApp from "@mendable/firecrawl-js"; +import { z } from "zod"; + +const app = new FirecrawlApp({ + apiKey: "fc-YOUR_API_KEY", +}); + +// Define schema to extract contents into +const schema = z.object({ + top: z + .array( + z.object({ + title: z.string(), + points: z.number(), + by: z.string(), + commentsURL: z.string(), + }) + ) + .length(5) + .describe("Top 5 stories on Hacker News"), +}); +const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", { + extractorOptions: { extractionSchema: schema }, +}); +console.log(scrapeResult.data["llm_extraction"]); +``` + +### Search for a query + +With the `search` method, you can search for a query in a search engine and get the top results along with the page content for each result. The method takes the query as a parameter and returns the search results. + +```js +const query = 'what is mendable?'; +const searchResults = await app.search(query, { + pageOptions: { + fetchPageContent: true // Fetch the page content for each search result + } +}); + +``` + + ## Contributing We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. From 3b5f71c1e81f47075d3d11ee7506be0316ce0a57 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 17:46:35 -0700 Subject: [PATCH 7/8] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 205ff3f..7368b08 100644 --- a/README.md +++ b/README.md @@ -379,9 +379,11 @@ const schema = z.object({ .length(5) .describe("Top 5 stories on Hacker News"), }); + const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", { extractorOptions: { extractionSchema: schema }, }); + console.log(scrapeResult.data["llm_extraction"]); ``` From d6b4904ef10644e95efac92dc80f06b2a897440a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 18:10:43 -0700 Subject: [PATCH 8/8] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7368b08..50eb06a 100644 --- a/README.md +++ b/README.md @@ -353,6 +353,8 @@ const status = await app.checkCrawlStatus(jobId); console.log(status); ``` + + ### Extracting structured data from a URL With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it: