0

Added local host support for the javascript SDK

This commit is contained in:
neev jewalkar 2024-06-18 05:42:25 +05:30
parent a20d002a6b
commit e5ffda1eec
6 changed files with 299 additions and 333 deletions

View File

@ -176,6 +176,11 @@ async function checkStatusExample(jobId) {
checkStatusExample('your_job_id_here');
```
## Running Locally
To use the SDK when running Firecrawl locally, you can change the initial Firecrawl app instance to:
```js
const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY", apiUrl: "http://localhost:3002" });
```
## Error Handling

View File

@ -18,9 +18,9 @@ export default class FirecrawlApp {
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null }) {
this.apiUrl = "https://api.firecrawl.dev";
constructor({ apiKey = null, apiUrl = null }) {
this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
if (!this.apiKey) {
throw new Error("No API key provided");
}

View File

@ -1,12 +1,12 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.22",
"version": "0.0.26",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "0.0.22",
"version": "0.0.26",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",

View File

@ -5,346 +5,347 @@ import { zodToJsonSchema } from "zod-to-json-schema";
* Configuration interface for FirecrawlApp.
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Generic parameter interface.
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: any;
error?: string;
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
success: boolean;
jobId?: string;
data?: any;
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any,
error?: string;
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any,
error?: string;
}
/**
* Main class for interacting with the Firecrawl API.
*/
export default class FirecrawlApp {
private apiKey: string;
private apiUrl: string = "https://api.firecrawl.dev";
private apiKey: string;
private apiUrl: string;
/**
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || "";
if (!this.apiKey) {
throw new Error("No API key provided");
}
}
/**
* Scrapes a URL using the Firecrawl API.
* @param {string} url - The URL to scrape.
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
async scrapeUrl(
url: string,
params: Params | null = null
): Promise<ScrapeResponse> {
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { url, ...params };
if (params?.extractorOptions?.extractionSchema) {
let schema = params.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema);
}
jsonData = {
...jsonData,
extractorOptions: {
...params.extractorOptions,
extractionSchema: schema,
mode: params.extractorOptions.mode || "llm-extraction",
},
};
}
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + "/v0/scrape",
jsonData,
{ headers },
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
} else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
/**
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev"
if (!this.apiKey) {
throw new Error("No API key provided");
}
} else {
this.handleError(response, "scrape URL");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
}
/**
* Searches for a query using the Firecrawl API.
* @param {string} query - The query to search for.
* @param {Params | null} params - Additional parameters for the search request.
* @returns {Promise<SearchResponse>} The response from the search operation.
*/
async search(
query: string,
params: Params | null = null
): Promise<SearchResponse> {
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + "/v0/search",
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
} else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
/**
* Scrapes a URL using the Firecrawl API.
* @param {string} url - The URL to scrape.
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
async scrapeUrl(
url: string,
params: Params | null = null
): Promise<ScrapeResponse> {
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { url, ...params };
if (params?.extractorOptions?.extractionSchema) {
let schema = params.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema);
}
jsonData = {
...jsonData,
extractorOptions: {
...params.extractorOptions,
extractionSchema: schema,
mode: params.extractorOptions.mode || "llm-extraction",
},
};
}
} else {
this.handleError(response, "search");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
}
/**
* Initiates a crawl job for a URL using the Firecrawl API.
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} pollInterval - Time in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
async crawlUrl(
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
pollInterval: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + "/v0/crawl",
jsonData,
headers
);
if (response.status === 200) {
const jobId: string = response.data.jobId;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, pollInterval);
} else {
return { success: true, jobId };
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + "/v0/scrape",
jsonData,
{ headers },
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
} else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
}
} else {
this.handleError(response, "scrape URL");
}
} catch (error: any) {
throw new Error(error.message);
}
} else {
this.handleError(response, "start crawl job");
}
} catch (error: any) {
console.log(error);
throw new Error(error.message);
return { success: false, error: "Internal server error." };
}
return { success: false, error: "Internal server error." };
}
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
* @returns {Promise<JobStatusResponse>} The response containing the job status.
*/
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (response.status === 200) {
/**
* Searches for a query using the Firecrawl API.
* @param {string} query - The query to search for.
* @param {Params | null} params - Additional parameters for the search request.
* @returns {Promise<SearchResponse>} The response from the search operation.
*/
async search(
query: string,
params: Params | null = null
): Promise<SearchResponse> {
const headers: AxiosRequestHeaders = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await axios.post(
this.apiUrl + "/v0/search",
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
} else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
}
} else {
this.handleError(response, "search");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
}
/**
* Initiates a crawl job for a URL using the Firecrawl API.
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} pollInterval - Time in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
async crawlUrl(
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
pollInterval: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + "/v0/crawl",
jsonData,
headers
);
if (response.status === 200) {
const jobId: string = response.data.jobId;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, pollInterval);
} else {
return { success: true, jobId };
}
} else {
this.handleError(response, "start crawl job");
}
} catch (error: any) {
console.log(error);
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
}
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
* @returns {Promise<JobStatusResponse>} The response containing the job status.
*/
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (response.status === 200) {
return {
success: true,
status: response.data.status,
data: response.data.data,
partial_data: !response.data.data ? response.data.partial_data : undefined,
};
} else {
this.handleError(response, "check crawl status");
}
} catch (error: any) {
throw new Error(error.message);
}
return {
success: true,
status: response.data.status,
data: response.data.data,
partial_data: !response.data.data ? response.data.partial_data : undefined,
success: false,
status: "unknown",
error: "Internal server error.",
};
} else {
this.handleError(response, "check crawl status");
}
} catch (error: any) {
throw new Error(error.message);
}
return {
success: false,
status: "unknown",
error: "Internal server error.",
};
}
/**
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
}
/**
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
}
/**
* Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {Params} data - The data to send in the request.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the POST request.
*/
postRequest(
url: string,
data: Params,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.post(url, data, { headers });
}
/**
* Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {Params} data - The data to send in the request.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the POST request.
*/
postRequest(
url: string,
data: Params,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.post(url, data, { headers });
}
/**
* Sends a GET request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the GET request.
*/
getRequest(
url: string,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.get(url, { headers });
}
/**
* Sends a GET request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the GET request.
*/
getRequest(
url: string,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.get(url, { headers });
}
/**
* Monitors the status of a crawl job until completion or failure.
* @param {string} jobId - The job ID of the crawl operation.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
async monitorJobStatus(
jobId: string,
headers: AxiosRequestHeaders,
checkInterval: number
): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData.data;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
["active", "paused", "pending", "queued"].includes(statusData.status)
) {
if (checkInterval < 2) {
checkInterval = 2;
}
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
} else {
throw new Error(
`Crawl job failed or was stopped. Status: ${statusData.status}`
);
/**
* Monitors the status of a crawl job until completion or failure.
* @param {string} jobId - The job ID of the crawl operation.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
async monitorJobStatus(
jobId: string,
headers: AxiosRequestHeaders,
checkInterval: number
): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData.data;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
["active", "paused", "pending", "queued"].includes(statusData.status)
) {
if (checkInterval < 2) {
checkInterval = 2;
}
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
} else {
throw new Error(
`Crawl job failed or was stopped. Status: ${statusData.status}`
);
}
} else {
this.handleError(statusResponse, "check crawl status");
}
}
} else {
this.handleError(statusResponse, "check crawl status");
}
}
}
/**
* Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API.
* @param {string} action - The action being performed when the error occurred.
*/
handleError(response: AxiosResponse, action: string): void {
if ([402, 408, 409, 500].includes(response.status)) {
const errorMessage: string =
response.data.error || "Unknown error occurred";
throw new Error(
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
);
} else {
throw new Error(
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
);
/**
* Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API.
* @param {string} action - The action being performed when the error occurred.
*/
handleError(response: AxiosResponse, action: string): void {
if ([402, 408, 409, 500].includes(response.status)) {
const errorMessage: string =
response.data.error || "Unknown error occurred";
throw new Error(
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
);
} else {
throw new Error(
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
);
}
}
}
}

View File

@ -64,7 +64,7 @@ export default class FirecrawlApp {
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey }: FirecrawlAppConfig);
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
/**
* Scrapes a URL using the Firecrawl API.
* @param {string} url - The URL to scrape.

View File

@ -11,10 +11,8 @@
"dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"uuid": "^9.0.1",
"zod": "^3.23.8"
},
"devDependencies": {
@ -452,15 +450,6 @@
"resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz",
"integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA=="
},
"node_modules/@types/node": {
"version": "20.12.11",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.11.tgz",
"integrity": "sha512-vDg9PZ/zi+Nqp6boSOT7plNuthRugEKixDv5sFTIpkE89MmNtEArAShI4mxuX2+UrLEe9pxC1vm2cjm9YlWbJw==",
"peer": true,
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/acorn": {
"version": "8.11.3",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz",
@ -532,17 +521,6 @@
"node": ">=0.3.1"
}
},
"node_modules/dotenv": {
"version": "16.4.5",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/esbuild": {
"version": "0.20.2",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
@ -750,24 +728,6 @@
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"peer": true
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/v8-compile-cache-lib": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",