diff --git a/README.md b/README.md index 9ac5636..50eb06a 100644 --- a/README.md +++ b/README.md @@ -215,8 +215,6 @@ curl -X POST https://api.firecrawl.dev/v0/scrape \ ``` -Coming soon to the Langchain and LLama Index integrations. - ## Using Python SDK ### Installing Python SDK @@ -248,6 +246,32 @@ url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support pydanti schemas to make it easier for you too. Here is how you to use it: + +```python +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +data = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) +print(data["llm_extraction"]) +``` + ### Search for a query Performs a web search, retrieve the top results, extract data from each page, and returns their markdown. @@ -257,6 +281,129 @@ query = 'What is Mendable?' search_result = app.search(query) ``` +## Using the Node SDK + +### Installation + +To install the Firecrawl Node SDK, you can use npm: + +```bash +npm install @mendable/firecrawl-js +``` + +### Usage + +1. Get an API key from [firecrawl.dev](https://firecrawl.dev) +2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class. + + +### Scraping a URL + +To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary. + +```js +try { + const url = 'https://example.com'; + const scrapedData = await app.scrapeUrl(url); + console.log(scrapedData); + +} catch (error) { + console.error( + 'Error occurred while scraping:', + error.message + ); +} +``` + + +### Crawling a Website + +To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format. + +```js +const crawlUrl = 'https://example.com'; +const params = { + crawlerOptions: { + excludes: ['blog/'], + includes: [], // leave empty for all pages + limit: 1000, + }, + pageOptions: { + onlyMainContent: true + } +}; +const waitUntilDone = true; +const timeout = 5; +const crawlResult = await app.crawlUrl( + crawlUrl, + params, + waitUntilDone, + timeout +); + +``` + + +### Checking Crawl Status + +To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job. + +```js +const status = await app.checkCrawlStatus(jobId); +console.log(status); +``` + + + +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it: + +```js +import FirecrawlApp from "@mendable/firecrawl-js"; +import { z } from "zod"; + +const app = new FirecrawlApp({ + apiKey: "fc-YOUR_API_KEY", +}); + +// Define schema to extract contents into +const schema = z.object({ + top: z + .array( + z.object({ + title: z.string(), + points: z.number(), + by: z.string(), + commentsURL: z.string(), + }) + ) + .length(5) + .describe("Top 5 stories on Hacker News"), +}); + +const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", { + extractorOptions: { extractionSchema: schema }, +}); + +console.log(scrapeResult.data["llm_extraction"]); +``` + +### Search for a query + +With the `search` method, you can search for a query in a search engine and get the top results along with the page content for each result. The method takes the query as a parameter and returns the search results. + +```js +const query = 'what is mendable?'; +const searchResults = await app.search(query, { + pageOptions: { + fetchPageContent: true // Fetch the page content for each search result + } +}); + +``` + + ## Contributing We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 9d8237b..6e0f367 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -7,9 +7,9 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; -import axios from 'axios'; -import dotenv from 'dotenv'; -dotenv.config(); +import axios from "axios"; +import { z } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; /** * Main class for interacting with the Firecrawl API. */ @@ -19,9 +19,9 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }) { - this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + this.apiKey = apiKey || ""; if (!this.apiKey) { - throw new Error('No API key provided'); + throw new Error("No API key provided"); } } /** @@ -32,16 +32,22 @@ export default class FirecrawlApp { */ scrapeUrl(url_1) { return __awaiter(this, arguments, void 0, function* (url, params = null) { + var _a; const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; - let jsonData = { url }; - if (params) { - jsonData = Object.assign(Object.assign({}, jsonData), params); + let jsonData = Object.assign({ url }, params); + if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } + jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) }); } try { - const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { @@ -52,13 +58,13 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'scrape URL'); + this.handleError(response, "scrape URL"); } } catch (error) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -70,15 +76,15 @@ export default class FirecrawlApp { search(query_1) { return __awaiter(this, arguments, void 0, function* (query, params = null) { const headers = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; let jsonData = { query }; if (params) { jsonData = Object.assign(Object.assign({}, jsonData), params); } try { - const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers }); if (response.status === 200) { const responseData = response.data; if (responseData.success) { @@ -89,13 +95,13 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'search'); + this.handleError(response, "search"); } } catch (error) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -114,7 +120,7 @@ export default class FirecrawlApp { jsonData = Object.assign(Object.assign({}, jsonData), params); } try { - const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers); if (response.status === 200) { const jobId = response.data.jobId; if (waitUntilDone) { @@ -125,14 +131,14 @@ export default class FirecrawlApp { } } else { - this.handleError(response, 'start crawl job'); + this.handleError(response, "start crawl job"); } } catch (error) { console.log(error); throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; }); } /** @@ -149,13 +155,17 @@ export default class FirecrawlApp { return response.data; } else { - this.handleError(response, 'check crawl status'); + this.handleError(response, "check crawl status"); } } catch (error) { throw new Error(error.message); } - return { success: false, status: 'unknown', error: 'Internal server error.' }; + return { + success: false, + status: "unknown", + error: "Internal server error.", + }; }); } /** @@ -164,8 +174,8 @@ export default class FirecrawlApp { */ prepareHeaders() { return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, }; } /** @@ -200,26 +210,26 @@ export default class FirecrawlApp { const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); if (statusResponse.status === 200) { const statusData = statusResponse.data; - if (statusData.status === 'completed') { - if ('data' in statusData) { + if (statusData.status === "completed") { + if ("data" in statusData) { return statusData.data; } else { - throw new Error('Crawl job completed but no data was returned'); + throw new Error("Crawl job completed but no data was returned"); } } - else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + else if (["active", "paused", "pending", "queued"].includes(statusData.status)) { if (timeout < 2) { timeout = 2; } - yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again } else { throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); } } else { - this.handleError(statusResponse, 'check crawl status'); + this.handleError(statusResponse, "check crawl status"); } } }); @@ -231,7 +241,7 @@ export default class FirecrawlApp { */ handleError(response, action) { if ([402, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || 'Unknown error occurred'; + const errorMessage = response.data.error || "Unknown error occurred"; throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); } else { diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 9811597..6b085be 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,15 +1,17 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.17-beta.8", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.13", + "version": "0.0.17-beta.8", "license": "MIT", "dependencies": { - "axios": "^1.6.8" + "axios": "^1.6.8", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" }, "devDependencies": { "@jest/globals": "^29.7.0", @@ -3766,6 +3768,22 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index a8275f7..a9359cf 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.16", + "version": "0.0.19", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", @@ -8,6 +8,7 @@ "scripts": { "build": "tsc", "publish": "npm run build && npm publish --access public", + "publish-beta": "npm run build && npm publish --access public --tag beta", "test": "jest src/**/*.test.ts" }, "repository": { @@ -17,7 +18,9 @@ "author": "Mendable.ai", "license": "MIT", "dependencies": { - "axios": "^1.6.8" + "axios": "^1.6.8", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" }, "bugs": { "url": "https://github.com/mendableai/firecrawl/issues" diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index aea15f8..0319c74 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,5 +1,6 @@ -import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios'; - +import axios, { AxiosResponse, AxiosRequestHeaders } from "axios"; +import { z } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; /** * Configuration interface for FirecrawlApp. */ @@ -12,6 +13,11 @@ export interface FirecrawlAppConfig { */ export interface Params { [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** @@ -63,9 +69,9 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || ''; + this.apiKey = apiKey || ""; if (!this.apiKey) { - throw new Error('No API key provided'); + throw new Error("No API key provided"); } } @@ -75,31 +81,50 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the scrape request. * @returns {Promise} The response from the scrape operation. */ - async scrapeUrl(url: string, params: Params | null = null): Promise { + async scrapeUrl( + url: string, + params: Params | null = null + ): Promise { const headers: AxiosRequestHeaders = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; - let jsonData: Params = { url }; - if (params) { - jsonData = { ...jsonData, ...params }; + let jsonData: Params = { url, ...params }; + if (params?.extractorOptions?.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } + jsonData = { + ...jsonData, + extractorOptions: { + ...params.extractorOptions, + extractionSchema: schema, + mode: params.extractorOptions.mode || "llm-extraction", + }, + }; } try { - const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + const response: AxiosResponse = await axios.post( + "https://api.firecrawl.dev/v0/scrape", + jsonData, + { headers } + ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return responseData; + return responseData; } else { throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); } } else { - this.handleError(response, 'scrape URL'); + this.handleError(response, "scrape URL"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -108,31 +133,38 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the search request. * @returns {Promise} The response from the search operation. */ - async search(query: string, params: Params | null = null): Promise { + async search( + query: string, + params: Params | null = null + ): Promise { const headers: AxiosRequestHeaders = { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; let jsonData: Params = { query }; if (params) { jsonData = { ...jsonData, ...params }; } try { - const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers }); + const response: AxiosResponse = await axios.post( + "https://api.firecrawl.dev/v0/search", + jsonData, + { headers } + ); if (response.status === 200) { const responseData = response.data; if (responseData.success) { - return responseData; + return responseData; } else { throw new Error(`Failed to search. Error: ${responseData.error}`); } } else { - this.handleError(response, 'search'); + this.handleError(response, "search"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -143,14 +175,23 @@ export default class FirecrawlApp { * @param {number} timeout - Timeout in seconds for job status checks. * @returns {Promise} The response from the crawl operation. */ - async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { + async crawlUrl( + url: string, + params: Params | null = null, + waitUntilDone: boolean = true, + timeout: number = 2 + ): Promise { const headers = this.prepareHeaders(); let jsonData: Params = { url }; if (params) { jsonData = { ...jsonData, ...params }; } try { - const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + const response: AxiosResponse = await this.postRequest( + "https://api.firecrawl.dev/v0/crawl", + jsonData, + headers + ); if (response.status === 200) { const jobId: string = response.data.jobId; if (waitUntilDone) { @@ -159,13 +200,13 @@ export default class FirecrawlApp { return { success: true, jobId }; } } else { - this.handleError(response, 'start crawl job'); + this.handleError(response, "start crawl job"); } } catch (error: any) { - console.log(error) + console.log(error); throw new Error(error.message); } - return { success: false, error: 'Internal server error.' }; + return { success: false, error: "Internal server error." }; } /** @@ -176,16 +217,23 @@ export default class FirecrawlApp { async checkCrawlStatus(jobId: string): Promise { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { - const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + const response: AxiosResponse = await this.getRequest( + `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + headers + ); if (response.status === 200) { return response.data; } else { - this.handleError(response, 'check crawl status'); + this.handleError(response, "check crawl status"); } } catch (error: any) { throw new Error(error.message); } - return { success: false, status: 'unknown', error: 'Internal server error.' }; + return { + success: false, + status: "unknown", + error: "Internal server error.", + }; } /** @@ -194,8 +242,8 @@ export default class FirecrawlApp { */ prepareHeaders(): AxiosRequestHeaders { return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, } as AxiosRequestHeaders; } @@ -206,7 +254,11 @@ export default class FirecrawlApp { * @param {AxiosRequestHeaders} headers - The headers for the request. * @returns {Promise} The response from the POST request. */ - postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise { + postRequest( + url: string, + data: Params, + headers: AxiosRequestHeaders + ): Promise { return axios.post(url, data, { headers }); } @@ -216,7 +268,10 @@ export default class FirecrawlApp { * @param {AxiosRequestHeaders} headers - The headers for the request. * @returns {Promise} The response from the GET request. */ - getRequest(url: string, headers: AxiosRequestHeaders): Promise { + getRequest( + url: string, + headers: AxiosRequestHeaders + ): Promise { return axios.get(url, { headers }); } @@ -227,27 +282,38 @@ export default class FirecrawlApp { * @param {number} timeout - Timeout in seconds for job status checks. * @returns {Promise} The final job status or data. */ - async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise { + async monitorJobStatus( + jobId: string, + headers: AxiosRequestHeaders, + timeout: number + ): Promise { while (true) { - const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + const statusResponse: AxiosResponse = await this.getRequest( + `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + headers + ); if (statusResponse.status === 200) { const statusData = statusResponse.data; - if (statusData.status === 'completed') { - if ('data' in statusData) { + if (statusData.status === "completed") { + if ("data" in statusData) { return statusData.data; } else { - throw new Error('Crawl job completed but no data was returned'); + throw new Error("Crawl job completed but no data was returned"); } - } else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + } else if ( + ["active", "paused", "pending", "queued"].includes(statusData.status) + ) { if (timeout < 2) { timeout = 2; } - await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again } else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + throw new Error( + `Crawl job failed or was stopped. Status: ${statusData.status}` + ); } } else { - this.handleError(statusResponse, 'check crawl status'); + this.handleError(statusResponse, "check crawl status"); } } } @@ -259,10 +325,15 @@ export default class FirecrawlApp { */ handleError(response: AxiosResponse, action: string): void { if ([402, 409, 500].includes(response.status)) { - const errorMessage: string = response.data.error || 'Unknown error occurred'; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + const errorMessage: string = + response.data.error || "Unknown error occurred"; + throw new Error( + `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}` + ); } else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + throw new Error( + `Unexpected error occurred while trying to ${action}. Status code: ${response.status}` + ); } } } diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 7f79d64..40d95c4 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -1,4 +1,5 @@ -import { AxiosResponse, AxiosRequestHeaders } from 'axios'; +import { AxiosResponse, AxiosRequestHeaders } from "axios"; +import { z } from "zod"; /** * Configuration interface for FirecrawlApp. */ @@ -10,6 +11,11 @@ export interface FirecrawlAppConfig { */ export interface Params { [key: string]: any; + extractorOptions?: { + extractionSchema: z.ZodSchema | any; + mode?: "llm-extraction"; + extractionPrompt?: string; + }; } /** * Response interface for scraping operations. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 363f301..4d26319 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -9,19 +9,480 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "@mendable/firecrawl-js": "^0.0.19", + "axios": "^1.6.8", + "ts-node": "^10.9.2", + "typescript": "^5.4.5", + "zod": "^3.23.8" + }, + "devDependencies": { + "tsx": "^4.9.3" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz", + "integrity": "sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.20.2.tgz", + "integrity": "sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz", + "integrity": "sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.20.2.tgz", + "integrity": "sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz", + "integrity": "sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz", + "integrity": "sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz", + "integrity": "sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz", + "integrity": "sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz", + "integrity": "sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz", + "integrity": "sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz", + "integrity": "sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz", + "integrity": "sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz", + "integrity": "sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz", + "integrity": "sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz", + "integrity": "sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz", + "integrity": "sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz", + "integrity": "sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz", + "integrity": "sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz", + "integrity": "sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz", + "integrity": "sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz", + "integrity": "sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz", + "integrity": "sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz", + "integrity": "sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" } }, "node_modules/@mendable/firecrawl-js": { - "version": "0.0.15", - "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz", - "integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==", + "version": "0.0.19", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.19.tgz", + "integrity": "sha512-u9BDVIN/bftDztxLlE2cf02Nz0si3+Vmy9cANDFHj/iriT3guzI8ITBk4uC81CyRmPzNyXrW6hSAG90g9ol4cA==", "dependencies": { "axios": "^1.6.8", - "dotenv": "^16.4.5" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" } }, + "node_modules/@tsconfig/node10": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", + "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==" + }, + "node_modules/@types/node": { + "version": "20.12.11", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.11.tgz", + "integrity": "sha512-vDg9PZ/zi+Nqp6boSOT7plNuthRugEKixDv5sFTIpkE89MmNtEArAShI4mxuX2+UrLEe9pxC1vm2cjm9YlWbJw==", + "peer": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/acorn": { + "version": "8.11.3", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", + "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.2", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.2.tgz", + "integrity": "sha512-cjkyv4OtNCIeqhHrfS81QWXoCBPExR/J62oyEqepVw8WaQeSqpW2uhuLPh1m9eWhDuOo/jUXVTlifvesOWp/4A==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==" + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -48,6 +509,11 @@ "node": ">= 0.8" } }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==" + }, "node_modules/delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", @@ -56,15 +522,50 @@ "node": ">=0.4.0" } }, - "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/esbuild": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", + "integrity": "sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, "engines": { "node": ">=12" }, - "funding": { - "url": "https://dotenvx.com" + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.20.2", + "@esbuild/android-arm": "0.20.2", + "@esbuild/android-arm64": "0.20.2", + "@esbuild/android-x64": "0.20.2", + "@esbuild/darwin-arm64": "0.20.2", + "@esbuild/darwin-x64": "0.20.2", + "@esbuild/freebsd-arm64": "0.20.2", + "@esbuild/freebsd-x64": "0.20.2", + "@esbuild/linux-arm": "0.20.2", + "@esbuild/linux-arm64": "0.20.2", + "@esbuild/linux-ia32": "0.20.2", + "@esbuild/linux-loong64": "0.20.2", + "@esbuild/linux-mips64el": "0.20.2", + "@esbuild/linux-ppc64": "0.20.2", + "@esbuild/linux-riscv64": "0.20.2", + "@esbuild/linux-s390x": "0.20.2", + "@esbuild/linux-x64": "0.20.2", + "@esbuild/netbsd-x64": "0.20.2", + "@esbuild/openbsd-x64": "0.20.2", + "@esbuild/sunos-x64": "0.20.2", + "@esbuild/win32-arm64": "0.20.2", + "@esbuild/win32-ia32": "0.20.2", + "@esbuild/win32-x64": "0.20.2" } }, "node_modules/follow-redirects": { @@ -99,6 +600,37 @@ "node": ">= 6" } }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.7.4", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.4.tgz", + "integrity": "sha512-ofbkKj+0pjXjhejr007J/fLf+sW+8H7K5GCm+msC8q3IpvgjobpyPqSRFemNyIMxklC0zeJpi7VDFna19FacvQ==", + "dev": true, + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==" + }, "node_modules/mime-db": { "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", @@ -122,6 +654,123 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/tsx": { + "version": "4.9.3", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.9.3.tgz", + "integrity": "sha512-czVbetlILiyJZI5zGlj2kw9vFiSeyra9liPD4nG+Thh4pKTi0AmMEQ8zdV/L2xbIVKrIqif4sUNrsMAOksx9Zg==", + "dev": true, + "dependencies": { + "esbuild": "~0.20.2", + "get-tsconfig": "^4.7.3" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "peer": true + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "engines": { + "node": ">=6" + } + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.0", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz", + "integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 563e1e3..0e93fe3 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,7 +11,13 @@ "author": "", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "@mendable/firecrawl-js": "^0.0.19", + "axios": "^1.6.8", + "ts-node": "^10.9.2", + "typescript": "^5.4.5", + "zod": "^3.23.8" + }, + "devDependencies": { + "tsx": "^4.9.3" } } diff --git a/apps/js-sdk/test.ts b/apps/js-sdk/test.ts new file mode 100644 index 0000000..5419c2d --- /dev/null +++ b/apps/js-sdk/test.ts @@ -0,0 +1,28 @@ +import FirecrawlApp from "@mendable/firecrawl-js"; +import { z } from "zod"; + +async function a() { + const app = new FirecrawlApp({ + apiKey: "fc-YOUR_API_KEY", + }); + + // Define schema to extract contents into + const schema = z.object({ + top: z + .array( + z.object({ + title: z.string(), + points: z.number(), + by: z.string(), + commentsURL: z.string(), + }) + ) + .length(5) + .describe("Top 5 stories on Hacker News"), + }); + const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", { + extractorOptions: { extractionSchema: schema }, + }); + console.log(scrapeResult.data["llm_extraction"]); +} +a(); diff --git a/apps/js-sdk/tsconfig.json b/apps/js-sdk/tsconfig.json new file mode 100644 index 0000000..affe0ed --- /dev/null +++ b/apps/js-sdk/tsconfig.json @@ -0,0 +1,72 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig.json to read more about this file */ + + /* Basic Options */ + // "incremental": true, /* Enable incremental compilation */ + "target": "es6" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, + "module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */, + // "lib": [], /* Specify library files to be included in the compilation. */ + // "allowJs": true, /* Allow javascript files to be compiled. */ + // "checkJs": true, /* Report errors in .js files. */ + // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ + "declaration": true /* Generates corresponding '.d.ts' file. */, + // "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */ + // "sourceMap": true, /* Generates corresponding '.map' file. */ + // "outFile": "./", /* Concatenate and emit output to single file. */ + "outDir": "./build" /* Redirect output structure to the directory. */, + // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ + // "composite": true, /* Enable project compilation */ + // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ + // "removeComments": true, /* Do not emit comments to output. */ + // "noEmit": true, /* Do not emit outputs. */ + // "importHelpers": true, /* Import emit helpers from 'tslib'. */ + // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ + // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ + + /* Strict Type-Checking Options */ + "strict": false /* Enable all strict type-checking options. */, + // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ + // "strictNullChecks": true, /* Enable strict null checks. */ + // "strictFunctionTypes": true, /* Enable strict checking of function types. */ + // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ + // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ + // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ + // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ + + /* Additional Checks */ + // "noUnusedLocals": true, /* Report errors on unused locals. */ + // "noUnusedParameters": true, /* Report errors on unused parameters. */ + // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ + // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ + + /* Module Resolution Options */ + // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ + // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ + // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ + // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ + // "typeRoots": [], /* List of folders to include type definitions from. */ + // "types": [], /* Type declaration files to be included in compilation. */ + // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ + "resolveJsonModule": true, + "esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */, + // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + + /* Source Map Options */ + // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ + // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ + + /* Experimental Options */ + // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ + // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ + + /* Advanced Options */ + "skipLibCheck": true /* Skip type checking of declaration files. */, + "forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */ + }, + "include": ["src", "test.ts"], + "exclude": ["node_modules", "**/__tests__/*"] +} diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 02ad307..38ca843 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -46,6 +46,31 @@ To scrape a single URL, use the `scrape_url` method. It takes the URL as a param url = 'https://example.com' scraped_data = app.scrape_url(url) ``` +### Extracting structured data from a URL + +With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it: + +```python +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +data = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) +print(data["llm_extraction"]) +``` ### Search for a query diff --git a/apps/python-sdk/build/lib/firecrawl/firecrawl.py b/apps/python-sdk/build/lib/firecrawl/firecrawl.py index ef3eb53..701810c 100644 --- a/apps/python-sdk/build/lib/firecrawl/firecrawl.py +++ b/apps/python-sdk/build/lib/firecrawl/firecrawl.py @@ -1,5 +1,7 @@ import os +from typing import Any, Dict, Optional import requests +import time class FirecrawlApp: def __init__(self, api_key=None): @@ -7,26 +9,45 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - def scrape_url(self, url, params=None): + + + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}' } - json_data = {'url': url} + # Prepare the base scrape parameters with the URL + scrape_params = {'url': url} + + # If there are additional params, process them if params: - json_data.update(params) + # Initialize extractorOptions if present + extractor_options = params.get('extractorOptions', {}) + # Check and convert the extractionSchema if it's a Pydantic model + if 'extractionSchema' in extractor_options: + if hasattr(extractor_options['extractionSchema'], 'schema'): + extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() + # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided + extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') + # Update the scrape_params with the processed extractorOptions + scrape_params['extractorOptions'] = extractor_options + + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key != 'extractorOptions': + scrape_params[key] = value + # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', headers=headers, - json=json_data + json=scrape_params ) if response.status_code == 200: response = response.json() - if response['success'] == True: + if response['success']: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: error_message = response.json().get('error', 'Unknown error occurred') raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') @@ -88,11 +109,23 @@ class FirecrawlApp: 'Authorization': f'Bearer {self.api_key}' } - def _post_request(self, url, data, headers): - return requests.post(url, headers=headers, json=data) + def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.post(url, headers=headers, json=data) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response - def _get_request(self, url, headers): - return requests.get(url, headers=headers) + def _get_request(self, url, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.get(url, headers=headers) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response def _monitor_job_status(self, job_id, headers, timeout): import time diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz deleted file mode 100644 index c1b4206..0000000 Binary files a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz and /dev/null differ diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz new file mode 100644 index 0000000..b18dde5 Binary files /dev/null and b/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz differ diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl deleted file mode 100644 index 5aba561..0000000 Binary files a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl and /dev/null differ diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl new file mode 100644 index 0000000..f71cb8e Binary files /dev/null and b/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl differ diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index b178400..a2e0173 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -1,9 +1,10 @@ from firecrawl import FirecrawlApp -app = FirecrawlApp(api_key="YOUR_API_KEY") +app = FirecrawlApp(api_key="fc-YOUR_API_KEY") crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) + print(crawl_result[0]['markdown']) job_id = crawl_result['jobId'] @@ -11,3 +12,26 @@ print(job_id) status = app.check_crawl_status(job_id) print(status) + +from pydantic import BaseModel, Field +from typing import List, Optional + +class ArticleSchema(BaseModel): + title: str + points: int + by: str + commentsURL: str + +class TopArticlesSchema(BaseModel): + top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") + +a = app.scrape_url('https://news.ycombinator.com', { + 'extractorOptions': { + 'extractionSchema': TopArticlesSchema.model_json_schema(), + 'mode': 'llm-extraction' + }, + 'pageOptions':{ + 'onlyMainContent': True + } +}) + diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 441b940..701810c 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -1,4 +1,5 @@ import os +from typing import Any, Dict, Optional import requests import time @@ -8,26 +9,45 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - def scrape_url(self, url, params=None): + + + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}' } - json_data = {'url': url} + # Prepare the base scrape parameters with the URL + scrape_params = {'url': url} + + # If there are additional params, process them if params: - json_data.update(params) + # Initialize extractorOptions if present + extractor_options = params.get('extractorOptions', {}) + # Check and convert the extractionSchema if it's a Pydantic model + if 'extractionSchema' in extractor_options: + if hasattr(extractor_options['extractionSchema'], 'schema'): + extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() + # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided + extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') + # Update the scrape_params with the processed extractorOptions + scrape_params['extractorOptions'] = extractor_options + + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key != 'extractorOptions': + scrape_params[key] = value + # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', headers=headers, - json=json_data + json=scrape_params ) if response.status_code == 200: response = response.json() - if response['success'] == True: + if response['success']: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: error_message = response.json().get('error', 'Unknown error occurred') raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') diff --git a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO index 61589c2..e54fda5 100644 --- a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO +++ b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: firecrawl-py -Version: 0.0.6 +Version: 0.0.8 Summary: Python SDK for Firecrawl API Home-page: https://github.com/mendableai/firecrawl Author: Mendable.ai diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index a3589e3..78a4d84 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -2,12 +2,12 @@ from setuptools import setup, find_packages setup( name='firecrawl-py', - version='0.0.6', + version='0.0.8', url='https://github.com/mendableai/firecrawl', author='Mendable.ai', author_email='nick@mendable.ai', description='Python SDK for Firecrawl API', - packages=find_packages(), + packages=find_packages(), install_requires=[ 'requests', ],