Merge pull request #135 from mendableai/nsc/llm-extraction-zod-integration
Adds Zod Integration for LLM Extraction in the Firecrawl JS SDK
This commit is contained in:
commit
c02d7aeebd
151
README.md
151
README.md
@ -215,8 +215,6 @@ curl -X POST https://api.firecrawl.dev/v0/scrape \
|
||||
|
||||
```
|
||||
|
||||
Coming soon to the Langchain and LLama Index integrations.
|
||||
|
||||
## Using Python SDK
|
||||
|
||||
### Installing Python SDK
|
||||
@ -248,6 +246,32 @@ url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support pydanti schemas to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```python
|
||||
class ArticleSchema(BaseModel):
|
||||
title: str
|
||||
points: int
|
||||
by: str
|
||||
commentsURL: str
|
||||
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||
|
||||
data = app.scrape_url('https://news.ycombinator.com', {
|
||||
'extractorOptions': {
|
||||
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||
'mode': 'llm-extraction'
|
||||
},
|
||||
'pageOptions':{
|
||||
'onlyMainContent': True
|
||||
}
|
||||
})
|
||||
print(data["llm_extraction"])
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
|
||||
@ -257,6 +281,129 @@ query = 'What is Mendable?'
|
||||
search_result = app.search(query)
|
||||
```
|
||||
|
||||
## Using the Node SDK
|
||||
|
||||
### Installation
|
||||
|
||||
To install the Firecrawl Node SDK, you can use npm:
|
||||
|
||||
```bash
|
||||
npm install @mendable/firecrawl-js
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
|
||||
2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
|
||||
|
||||
|
||||
### Scraping a URL
|
||||
|
||||
To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
|
||||
|
||||
```js
|
||||
try {
|
||||
const url = 'https://example.com';
|
||||
const scrapedData = await app.scrapeUrl(url);
|
||||
console.log(scrapedData);
|
||||
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Error occurred while scraping:',
|
||||
error.message
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||
|
||||
```js
|
||||
const crawlUrl = 'https://example.com';
|
||||
const params = {
|
||||
crawlerOptions: {
|
||||
excludes: ['blog/'],
|
||||
includes: [], // leave empty for all pages
|
||||
limit: 1000,
|
||||
},
|
||||
pageOptions: {
|
||||
onlyMainContent: true
|
||||
}
|
||||
};
|
||||
const waitUntilDone = true;
|
||||
const timeout = 5;
|
||||
const crawlResult = await app.crawlUrl(
|
||||
crawlUrl,
|
||||
params,
|
||||
waitUntilDone,
|
||||
timeout
|
||||
);
|
||||
|
||||
```
|
||||
|
||||
|
||||
### Checking Crawl Status
|
||||
|
||||
To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
|
||||
|
||||
```js
|
||||
const status = await app.checkCrawlStatus(jobId);
|
||||
console.log(status);
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support zod schema to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```js
|
||||
import FirecrawlApp from "@mendable/firecrawl-js";
|
||||
import { z } from "zod";
|
||||
|
||||
const app = new FirecrawlApp({
|
||||
apiKey: "fc-YOUR_API_KEY",
|
||||
});
|
||||
|
||||
// Define schema to extract contents into
|
||||
const schema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", {
|
||||
extractorOptions: { extractionSchema: schema },
|
||||
});
|
||||
|
||||
console.log(scrapeResult.data["llm_extraction"]);
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
With the `search` method, you can search for a query in a search engine and get the top results along with the page content for each result. The method takes the query as a parameter and returns the search results.
|
||||
|
||||
```js
|
||||
const query = 'what is mendable?';
|
||||
const searchResults = await app.search(query, {
|
||||
pageOptions: {
|
||||
fetchPageContent: true // Fetch the page content for each search result
|
||||
}
|
||||
});
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
||||
|
@ -7,9 +7,9 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
import axios from 'axios';
|
||||
import dotenv from 'dotenv';
|
||||
dotenv.config();
|
||||
import axios from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
*/
|
||||
@ -19,9 +19,9 @@ export default class FirecrawlApp {
|
||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null }) {
|
||||
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
|
||||
this.apiKey = apiKey || "";
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
throw new Error("No API key provided");
|
||||
}
|
||||
}
|
||||
/**
|
||||
@ -32,16 +32,22 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
scrapeUrl(url_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
||||
var _a;
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
let jsonData = Object.assign({ url }, params);
|
||||
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
||||
let schema = params.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@ -52,13 +58,13 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@ -70,15 +76,15 @@ export default class FirecrawlApp {
|
||||
search(query_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { query };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@ -89,13 +95,13 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'search');
|
||||
this.handleError(response, "search");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@ -114,7 +120,7 @@ export default class FirecrawlApp {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const jobId = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
@ -125,14 +131,14 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
console.log(error);
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@ -149,13 +155,17 @@ export default class FirecrawlApp {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
error: "Internal server error.",
|
||||
};
|
||||
});
|
||||
}
|
||||
/**
|
||||
@ -164,8 +174,8 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
prepareHeaders() {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
}
|
||||
/**
|
||||
@ -200,26 +210,26 @@ export default class FirecrawlApp {
|
||||
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData.data;
|
||||
}
|
||||
else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
}
|
||||
else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
}
|
||||
else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -231,7 +241,7 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
handleError(response, action) {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || 'Unknown error occurred';
|
||||
const errorMessage = response.data.error || "Unknown error occurred";
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
}
|
||||
else {
|
||||
|
24
apps/js-sdk/firecrawl/package-lock.json
generated
24
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,15 +1,17 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.13",
|
||||
"version": "0.0.17-beta.8",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.13",
|
||||
"version": "0.0.17-beta.8",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8"
|
||||
"axios": "^1.6.8",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
@ -3766,6 +3768,22 @@
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.23.8",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
|
||||
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
},
|
||||
"node_modules/zod-to-json-schema": {
|
||||
"version": "3.23.0",
|
||||
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
|
||||
"integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
|
||||
"peerDependencies": {
|
||||
"zod": "^3.23.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.16",
|
||||
"version": "0.0.19",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
@ -8,6 +8,7 @@
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "jest src/**/*.test.ts"
|
||||
},
|
||||
"repository": {
|
||||
@ -17,7 +18,9 @@
|
||||
"author": "Mendable.ai",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8"
|
||||
"axios": "^1.6.8",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/mendableai/firecrawl/issues"
|
||||
|
@ -1,5 +1,6 @@
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
||||
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
*/
|
||||
@ -12,6 +13,11 @@ export interface FirecrawlAppConfig {
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,9 +69,9 @@ export default class FirecrawlApp {
|
||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null }: FirecrawlAppConfig) {
|
||||
this.apiKey = apiKey || '';
|
||||
this.apiKey = apiKey || "";
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
throw new Error("No API key provided");
|
||||
}
|
||||
}
|
||||
|
||||
@ -75,17 +81,36 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
||||
*/
|
||||
async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
|
||||
async scrapeUrl(
|
||||
url: string,
|
||||
params: Params | null = null
|
||||
): Promise<ScrapeResponse> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
let jsonData: Params = { url, ...params };
|
||||
if (params?.extractorOptions?.extractionSchema) {
|
||||
let schema = params.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...params.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: params.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/scrape",
|
||||
jsonData,
|
||||
{ headers }
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@ -94,12 +119,12 @@ export default class FirecrawlApp {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -108,17 +133,24 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
*/
|
||||
async search(query: string, params: Params | null = null): Promise<SearchResponse> {
|
||||
async search(
|
||||
query: string,
|
||||
params: Params | null = null
|
||||
): Promise<SearchResponse> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { query };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/search",
|
||||
jsonData,
|
||||
{ headers }
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@ -127,12 +159,12 @@ export default class FirecrawlApp {
|
||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'search');
|
||||
this.handleError(response, "search");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -143,14 +175,23 @@ export default class FirecrawlApp {
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<CrawlResponse | any> {
|
||||
async crawlUrl(
|
||||
url: string,
|
||||
params: Params | null = null,
|
||||
waitUntilDone: boolean = true,
|
||||
timeout: number = 2
|
||||
): Promise<CrawlResponse | any> {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
"https://api.firecrawl.dev/v0/crawl",
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const jobId: string = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
@ -159,13 +200,13 @@ export default class FirecrawlApp {
|
||||
return { success: true, jobId };
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(error)
|
||||
console.log(error);
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -176,16 +217,23 @@ export default class FirecrawlApp {
|
||||
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
error: "Internal server error.",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@ -194,8 +242,8 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
prepareHeaders(): AxiosRequestHeaders {
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
}
|
||||
|
||||
@ -206,7 +254,11 @@ export default class FirecrawlApp {
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
||||
*/
|
||||
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
postRequest(
|
||||
url: string,
|
||||
data: Params,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.post(url, data, { headers });
|
||||
}
|
||||
|
||||
@ -216,7 +268,10 @@ export default class FirecrawlApp {
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
||||
*/
|
||||
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
getRequest(
|
||||
url: string,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.get(url, { headers });
|
||||
}
|
||||
|
||||
@ -227,27 +282,38 @@ export default class FirecrawlApp {
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @returns {Promise<any>} The final job status or data.
|
||||
*/
|
||||
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
|
||||
async monitorJobStatus(
|
||||
jobId: string,
|
||||
headers: AxiosRequestHeaders,
|
||||
timeout: number
|
||||
): Promise<any> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
const statusResponse: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData.data;
|
||||
} else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
} else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
} else if (
|
||||
["active", "paused", "pending", "queued"].includes(statusData.status)
|
||||
) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
} else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
throw new Error(
|
||||
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
||||
);
|
||||
}
|
||||
} else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -259,10 +325,15 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
handleError(response: AxiosResponse, action: string): void {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage: string = response.data.error || 'Unknown error occurred';
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
const errorMessage: string =
|
||||
response.data.error || "Unknown error occurred";
|
||||
throw new Error(
|
||||
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
|
||||
);
|
||||
} else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
throw new Error(
|
||||
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -1,4 +1,5 @@
|
||||
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
*/
|
||||
@ -10,6 +11,11 @@ export interface FirecrawlAppConfig {
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
|
673
apps/js-sdk/package-lock.json
generated
673
apps/js-sdk/package-lock.json
generated
@ -9,19 +9,480 @@
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.15",
|
||||
"axios": "^1.6.8"
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tsx": "^4.9.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@cspotcode/source-map-support": {
|
||||
"version": "0.8.1",
|
||||
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
|
||||
"integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==",
|
||||
"dependencies": {
|
||||
"@jridgewell/trace-mapping": "0.3.9"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz",
|
||||
"integrity": "sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.20.2.tgz",
|
||||
"integrity": "sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz",
|
||||
"integrity": "sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz",
|
||||
"integrity": "sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz",
|
||||
"integrity": "sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz",
|
||||
"integrity": "sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz",
|
||||
"integrity": "sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz",
|
||||
"integrity": "sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz",
|
||||
"integrity": "sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz",
|
||||
"integrity": "sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz",
|
||||
"integrity": "sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz",
|
||||
"integrity": "sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz",
|
||||
"integrity": "sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz",
|
||||
"integrity": "sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz",
|
||||
"integrity": "sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz",
|
||||
"integrity": "sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@jridgewell/resolve-uri": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
|
||||
"integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jridgewell/sourcemap-codec": {
|
||||
"version": "1.4.15",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz",
|
||||
"integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg=="
|
||||
},
|
||||
"node_modules/@jridgewell/trace-mapping": {
|
||||
"version": "0.3.9",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz",
|
||||
"integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==",
|
||||
"dependencies": {
|
||||
"@jridgewell/resolve-uri": "^3.0.3",
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@mendable/firecrawl-js": {
|
||||
"version": "0.0.15",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz",
|
||||
"integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==",
|
||||
"version": "0.0.19",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.19.tgz",
|
||||
"integrity": "sha512-u9BDVIN/bftDztxLlE2cf02Nz0si3+Vmy9cANDFHj/iriT3guzI8ITBk4uC81CyRmPzNyXrW6hSAG90g9ol4cA==",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5"
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@tsconfig/node10": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz",
|
||||
"integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw=="
|
||||
},
|
||||
"node_modules/@tsconfig/node12": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz",
|
||||
"integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag=="
|
||||
},
|
||||
"node_modules/@tsconfig/node14": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz",
|
||||
"integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow=="
|
||||
},
|
||||
"node_modules/@tsconfig/node16": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
|
||||
"integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA=="
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.12.11",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.11.tgz",
|
||||
"integrity": "sha512-vDg9PZ/zi+Nqp6boSOT7plNuthRugEKixDv5sFTIpkE89MmNtEArAShI4mxuX2+UrLEe9pxC1vm2cjm9YlWbJw==",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/acorn": {
|
||||
"version": "8.11.3",
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz",
|
||||
"integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==",
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/acorn-walk": {
|
||||
"version": "8.3.2",
|
||||
"resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.2.tgz",
|
||||
"integrity": "sha512-cjkyv4OtNCIeqhHrfS81QWXoCBPExR/J62oyEqepVw8WaQeSqpW2uhuLPh1m9eWhDuOo/jUXVTlifvesOWp/4A==",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/arg": {
|
||||
"version": "4.1.3",
|
||||
"resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz",
|
||||
"integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA=="
|
||||
},
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
@ -48,6 +509,11 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/create-require": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz",
|
||||
"integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ=="
|
||||
},
|
||||
"node_modules/delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
@ -56,15 +522,50 @@
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"node_modules/diff": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
|
||||
"integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==",
|
||||
"engines": {
|
||||
"node": ">=0.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
|
||||
"integrity": "sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.20.2",
|
||||
"@esbuild/android-arm": "0.20.2",
|
||||
"@esbuild/android-arm64": "0.20.2",
|
||||
"@esbuild/android-x64": "0.20.2",
|
||||
"@esbuild/darwin-arm64": "0.20.2",
|
||||
"@esbuild/darwin-x64": "0.20.2",
|
||||
"@esbuild/freebsd-arm64": "0.20.2",
|
||||
"@esbuild/freebsd-x64": "0.20.2",
|
||||
"@esbuild/linux-arm": "0.20.2",
|
||||
"@esbuild/linux-arm64": "0.20.2",
|
||||
"@esbuild/linux-ia32": "0.20.2",
|
||||
"@esbuild/linux-loong64": "0.20.2",
|
||||
"@esbuild/linux-mips64el": "0.20.2",
|
||||
"@esbuild/linux-ppc64": "0.20.2",
|
||||
"@esbuild/linux-riscv64": "0.20.2",
|
||||
"@esbuild/linux-s390x": "0.20.2",
|
||||
"@esbuild/linux-x64": "0.20.2",
|
||||
"@esbuild/netbsd-x64": "0.20.2",
|
||||
"@esbuild/openbsd-x64": "0.20.2",
|
||||
"@esbuild/sunos-x64": "0.20.2",
|
||||
"@esbuild/win32-arm64": "0.20.2",
|
||||
"@esbuild/win32-ia32": "0.20.2",
|
||||
"@esbuild/win32-x64": "0.20.2"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
@ -99,6 +600,37 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.7.4",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.4.tgz",
|
||||
"integrity": "sha512-ofbkKj+0pjXjhejr007J/fLf+sW+8H7K5GCm+msC8q3IpvgjobpyPqSRFemNyIMxklC0zeJpi7VDFna19FacvQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/make-error": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
|
||||
"integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw=="
|
||||
},
|
||||
"node_modules/mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
|
||||
@ -122,6 +654,123 @@
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/ts-node": {
|
||||
"version": "10.9.2",
|
||||
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
|
||||
"integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
|
||||
"dependencies": {
|
||||
"@cspotcode/source-map-support": "^0.8.0",
|
||||
"@tsconfig/node10": "^1.0.7",
|
||||
"@tsconfig/node12": "^1.0.7",
|
||||
"@tsconfig/node14": "^1.0.0",
|
||||
"@tsconfig/node16": "^1.0.2",
|
||||
"acorn": "^8.4.1",
|
||||
"acorn-walk": "^8.1.1",
|
||||
"arg": "^4.1.0",
|
||||
"create-require": "^1.1.0",
|
||||
"diff": "^4.0.1",
|
||||
"make-error": "^1.1.1",
|
||||
"v8-compile-cache-lib": "^3.0.1",
|
||||
"yn": "3.1.1"
|
||||
},
|
||||
"bin": {
|
||||
"ts-node": "dist/bin.js",
|
||||
"ts-node-cwd": "dist/bin-cwd.js",
|
||||
"ts-node-esm": "dist/bin-esm.js",
|
||||
"ts-node-script": "dist/bin-script.js",
|
||||
"ts-node-transpile-only": "dist/bin-transpile.js",
|
||||
"ts-script": "dist/bin-script-deprecated.js"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@swc/core": ">=1.2.50",
|
||||
"@swc/wasm": ">=1.2.50",
|
||||
"@types/node": "*",
|
||||
"typescript": ">=2.7"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@swc/core": {
|
||||
"optional": true
|
||||
},
|
||||
"@swc/wasm": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.9.3",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.9.3.tgz",
|
||||
"integrity": "sha512-czVbetlILiyJZI5zGlj2kw9vFiSeyra9liPD4nG+Thh4pKTi0AmMEQ8zdV/L2xbIVKrIqif4sUNrsMAOksx9Zg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "~0.20.2",
|
||||
"get-tsconfig": "^4.7.3"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.4.5",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz",
|
||||
"integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/v8-compile-cache-lib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg=="
|
||||
},
|
||||
"node_modules/yn": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
|
||||
"integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.23.8",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
|
||||
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
},
|
||||
"node_modules/zod-to-json-schema": {
|
||||
"version": "3.23.0",
|
||||
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
|
||||
"integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
|
||||
"peerDependencies": {
|
||||
"zod": "^3.23.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,13 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.15",
|
||||
"axios": "^1.6.8"
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tsx": "^4.9.3"
|
||||
}
|
||||
}
|
||||
|
28
apps/js-sdk/test.ts
Normal file
28
apps/js-sdk/test.ts
Normal file
@ -0,0 +1,28 @@
|
||||
import FirecrawlApp from "@mendable/firecrawl-js";
|
||||
import { z } from "zod";
|
||||
|
||||
async function a() {
|
||||
const app = new FirecrawlApp({
|
||||
apiKey: "fc-YOUR_API_KEY",
|
||||
});
|
||||
|
||||
// Define schema to extract contents into
|
||||
const schema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
const scrapeResult = await app.scrapeUrl("https://firecrawl.dev", {
|
||||
extractorOptions: { extractionSchema: schema },
|
||||
});
|
||||
console.log(scrapeResult.data["llm_extraction"]);
|
||||
}
|
||||
a();
|
72
apps/js-sdk/tsconfig.json
Normal file
72
apps/js-sdk/tsconfig.json
Normal file
@ -0,0 +1,72 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
/* Visit https://aka.ms/tsconfig.json to read more about this file */
|
||||
|
||||
/* Basic Options */
|
||||
// "incremental": true, /* Enable incremental compilation */
|
||||
"target": "es6" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */,
|
||||
"module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */,
|
||||
// "lib": [], /* Specify library files to be included in the compilation. */
|
||||
// "allowJs": true, /* Allow javascript files to be compiled. */
|
||||
// "checkJs": true, /* Report errors in .js files. */
|
||||
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
|
||||
"declaration": true /* Generates corresponding '.d.ts' file. */,
|
||||
// "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */
|
||||
// "sourceMap": true, /* Generates corresponding '.map' file. */
|
||||
// "outFile": "./", /* Concatenate and emit output to single file. */
|
||||
"outDir": "./build" /* Redirect output structure to the directory. */,
|
||||
// "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
|
||||
// "composite": true, /* Enable project compilation */
|
||||
// "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */
|
||||
// "removeComments": true, /* Do not emit comments to output. */
|
||||
// "noEmit": true, /* Do not emit outputs. */
|
||||
// "importHelpers": true, /* Import emit helpers from 'tslib'. */
|
||||
// "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
|
||||
// "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
|
||||
|
||||
/* Strict Type-Checking Options */
|
||||
"strict": false /* Enable all strict type-checking options. */,
|
||||
// "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */
|
||||
// "strictNullChecks": true, /* Enable strict null checks. */
|
||||
// "strictFunctionTypes": true, /* Enable strict checking of function types. */
|
||||
// "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
|
||||
// "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */
|
||||
// "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */
|
||||
// "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */
|
||||
|
||||
/* Additional Checks */
|
||||
// "noUnusedLocals": true, /* Report errors on unused locals. */
|
||||
// "noUnusedParameters": true, /* Report errors on unused parameters. */
|
||||
// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */
|
||||
// "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */
|
||||
|
||||
/* Module Resolution Options */
|
||||
// "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
|
||||
// "baseUrl": "./", /* Base directory to resolve non-absolute module names. */
|
||||
// "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
|
||||
// "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */
|
||||
// "typeRoots": [], /* List of folders to include type definitions from. */
|
||||
// "types": [], /* Type declaration files to be included in compilation. */
|
||||
// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
|
||||
"resolveJsonModule": true,
|
||||
"esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */,
|
||||
// "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */
|
||||
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
||||
|
||||
/* Source Map Options */
|
||||
// "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */
|
||||
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
||||
// "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */
|
||||
// "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
|
||||
|
||||
/* Experimental Options */
|
||||
// "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
|
||||
// "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
|
||||
|
||||
/* Advanced Options */
|
||||
"skipLibCheck": true /* Skip type checking of declaration files. */,
|
||||
"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
|
||||
},
|
||||
"include": ["src", "test.ts"],
|
||||
"exclude": ["node_modules", "**/__tests__/*"]
|
||||
}
|
@ -46,6 +46,31 @@ To scrape a single URL, use the `scrape_url` method. It takes the URL as a param
|
||||
url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```python
|
||||
class ArticleSchema(BaseModel):
|
||||
title: str
|
||||
points: int
|
||||
by: str
|
||||
commentsURL: str
|
||||
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||
|
||||
data = app.scrape_url('https://news.ycombinator.com', {
|
||||
'extractorOptions': {
|
||||
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||
'mode': 'llm-extraction'
|
||||
},
|
||||
'pageOptions':{
|
||||
'onlyMainContent': True
|
||||
}
|
||||
})
|
||||
print(data["llm_extraction"])
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
import requests
|
||||
import time
|
||||
|
||||
class FirecrawlApp:
|
||||
def __init__(self, api_key=None):
|
||||
@ -7,26 +9,45 @@ class FirecrawlApp:
|
||||
if self.api_key is None:
|
||||
raise ValueError('No API key provided')
|
||||
|
||||
def scrape_url(self, url, params=None):
|
||||
|
||||
|
||||
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
json_data = {'url': url}
|
||||
# Prepare the base scrape parameters with the URL
|
||||
scrape_params = {'url': url}
|
||||
|
||||
# If there are additional params, process them
|
||||
if params:
|
||||
json_data.update(params)
|
||||
# Initialize extractorOptions if present
|
||||
extractor_options = params.get('extractorOptions', {})
|
||||
# Check and convert the extractionSchema if it's a Pydantic model
|
||||
if 'extractionSchema' in extractor_options:
|
||||
if hasattr(extractor_options['extractionSchema'], 'schema'):
|
||||
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
|
||||
# Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided
|
||||
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
||||
# Update the scrape_params with the processed extractorOptions
|
||||
scrape_params['extractorOptions'] = extractor_options
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key != 'extractorOptions':
|
||||
scrape_params[key] = value
|
||||
# Make the POST request with the prepared headers and JSON data
|
||||
response = requests.post(
|
||||
'https://api.firecrawl.dev/v0/scrape',
|
||||
headers=headers,
|
||||
json=json_data
|
||||
json=scrape_params
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
if response['success']:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||
@ -88,11 +109,23 @@ class FirecrawlApp:
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
|
||||
def _post_request(self, url, data, headers):
|
||||
return requests.post(url, headers=headers, json=data)
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||
for attempt in range(retries):
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
if response.status_code == 502:
|
||||
time.sleep(backoff_factor * (2 ** attempt))
|
||||
else:
|
||||
return response
|
||||
return response
|
||||
|
||||
def _get_request(self, url, headers):
|
||||
return requests.get(url, headers=headers)
|
||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
|
||||
for attempt in range(retries):
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 502:
|
||||
time.sleep(backoff_factor * (2 ** attempt))
|
||||
else:
|
||||
return response
|
||||
return response
|
||||
|
||||
def _monitor_job_status(self, job_id, headers, timeout):
|
||||
import time
|
||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl
vendored
Normal file
Binary file not shown.
@ -1,9 +1,10 @@
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
|
||||
app = FirecrawlApp(api_key="YOUR_API_KEY")
|
||||
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
||||
|
||||
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
|
||||
|
||||
print(crawl_result[0]['markdown'])
|
||||
|
||||
job_id = crawl_result['jobId']
|
||||
@ -11,3 +12,26 @@ print(job_id)
|
||||
|
||||
status = app.check_crawl_status(job_id)
|
||||
print(status)
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
|
||||
class ArticleSchema(BaseModel):
|
||||
title: str
|
||||
points: int
|
||||
by: str
|
||||
commentsURL: str
|
||||
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||
|
||||
a = app.scrape_url('https://news.ycombinator.com', {
|
||||
'extractorOptions': {
|
||||
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||
'mode': 'llm-extraction'
|
||||
},
|
||||
'pageOptions':{
|
||||
'onlyMainContent': True
|
||||
}
|
||||
})
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
import requests
|
||||
import time
|
||||
|
||||
@ -8,26 +9,45 @@ class FirecrawlApp:
|
||||
if self.api_key is None:
|
||||
raise ValueError('No API key provided')
|
||||
|
||||
def scrape_url(self, url, params=None):
|
||||
|
||||
|
||||
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
json_data = {'url': url}
|
||||
# Prepare the base scrape parameters with the URL
|
||||
scrape_params = {'url': url}
|
||||
|
||||
# If there are additional params, process them
|
||||
if params:
|
||||
json_data.update(params)
|
||||
# Initialize extractorOptions if present
|
||||
extractor_options = params.get('extractorOptions', {})
|
||||
# Check and convert the extractionSchema if it's a Pydantic model
|
||||
if 'extractionSchema' in extractor_options:
|
||||
if hasattr(extractor_options['extractionSchema'], 'schema'):
|
||||
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
|
||||
# Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided
|
||||
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
||||
# Update the scrape_params with the processed extractorOptions
|
||||
scrape_params['extractorOptions'] = extractor_options
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key != 'extractorOptions':
|
||||
scrape_params[key] = value
|
||||
# Make the POST request with the prepared headers and JSON data
|
||||
response = requests.post(
|
||||
'https://api.firecrawl.dev/v0/scrape',
|
||||
headers=headers,
|
||||
json=json_data
|
||||
json=scrape_params
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
if response['success']:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||
|
@ -1,6 +1,6 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: firecrawl-py
|
||||
Version: 0.0.6
|
||||
Version: 0.0.8
|
||||
Summary: Python SDK for Firecrawl API
|
||||
Home-page: https://github.com/mendableai/firecrawl
|
||||
Author: Mendable.ai
|
||||
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='firecrawl-py',
|
||||
version='0.0.6',
|
||||
version='0.0.8',
|
||||
url='https://github.com/mendableai/firecrawl',
|
||||
author='Mendable.ai',
|
||||
author_email='nick@mendable.ai',
|
||||
|
Loading…
Reference in New Issue
Block a user