2024-04-16 13:02:16 -04:00
|
|
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
|
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
|
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
|
|
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
|
|
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
|
|
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
|
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
|
|
});
|
|
|
|
};
|
2024-05-08 19:38:49 -04:00
|
|
|
import axios from "axios";
|
|
|
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Main class for interacting with the Firecrawl API.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
export default class FirecrawlApp {
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Initializes a new instance of the FirecrawlApp class.
|
|
|
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
constructor({ apiKey = null }) {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.apiKey = apiKey || "";
|
2024-04-16 13:02:16 -04:00
|
|
|
if (!this.apiKey) {
|
2024-05-08 19:38:49 -04:00
|
|
|
throw new Error("No API key provided");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Scrapes a URL using the Firecrawl API.
|
|
|
|
* @param {string} url - The URL to scrape.
|
|
|
|
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
|
|
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
scrapeUrl(url_1) {
|
|
|
|
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
2024-05-08 19:38:49 -04:00
|
|
|
var _a;
|
2024-04-16 13:02:16 -04:00
|
|
|
const headers = {
|
2024-05-08 19:38:49 -04:00
|
|
|
"Content-Type": "application/json",
|
|
|
|
Authorization: `Bearer ${this.apiKey}`,
|
2024-04-16 13:02:16 -04:00
|
|
|
};
|
2024-05-08 19:38:49 -04:00
|
|
|
let jsonData = Object.assign({ url }, params);
|
|
|
|
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
|
|
|
const schema = zodToJsonSchema(params.extractorOptions.extractionSchema);
|
|
|
|
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
try {
|
2024-05-08 19:38:49 -04:00
|
|
|
const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
|
2024-04-16 13:02:16 -04:00
|
|
|
if (response.status === 200) {
|
|
|
|
const responseData = response.data;
|
|
|
|
if (responseData.success) {
|
2024-04-19 14:27:54 -04:00
|
|
|
return responseData;
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.handleError(response, "scrape URL");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (error) {
|
|
|
|
throw new Error(error.message);
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
return { success: false, error: "Internal server error." };
|
2024-04-25 15:49:10 -04:00
|
|
|
});
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* Searches for a query using the Firecrawl API.
|
|
|
|
* @param {string} query - The query to search for.
|
|
|
|
* @param {Params | null} params - Additional parameters for the search request.
|
|
|
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
|
|
*/
|
|
|
|
search(query_1) {
|
|
|
|
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
|
|
|
const headers = {
|
2024-05-08 19:38:49 -04:00
|
|
|
"Content-Type": "application/json",
|
|
|
|
Authorization: `Bearer ${this.apiKey}`,
|
2024-04-25 15:49:10 -04:00
|
|
|
};
|
|
|
|
let jsonData = { query };
|
|
|
|
if (params) {
|
|
|
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
|
|
}
|
|
|
|
try {
|
2024-05-08 19:38:49 -04:00
|
|
|
const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
|
2024-04-25 15:49:10 -04:00
|
|
|
if (response.status === 200) {
|
|
|
|
const responseData = response.data;
|
|
|
|
if (responseData.success) {
|
|
|
|
return responseData;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.handleError(response, "search");
|
2024-04-25 15:49:10 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (error) {
|
|
|
|
throw new Error(error.message);
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
return { success: false, error: "Internal server error." };
|
2024-04-16 13:02:16 -04:00
|
|
|
});
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
|
|
* @param {string} url - The URL to crawl.
|
|
|
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
|
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
|
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
2024-04-23 13:55:40 -04:00
|
|
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
2024-04-19 14:27:54 -04:00
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
crawlUrl(url_1) {
|
|
|
|
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
|
|
|
|
const headers = this.prepareHeaders();
|
|
|
|
let jsonData = { url };
|
|
|
|
if (params) {
|
|
|
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
|
|
}
|
|
|
|
try {
|
2024-05-08 19:38:49 -04:00
|
|
|
const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
|
2024-04-16 13:02:16 -04:00
|
|
|
if (response.status === 200) {
|
|
|
|
const jobId = response.data.jobId;
|
|
|
|
if (waitUntilDone) {
|
|
|
|
return this.monitorJobStatus(jobId, headers, timeout);
|
|
|
|
}
|
|
|
|
else {
|
2024-04-19 14:27:54 -04:00
|
|
|
return { success: true, jobId };
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.handleError(response, "start crawl job");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (error) {
|
|
|
|
console.log(error);
|
|
|
|
throw new Error(error.message);
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
return { success: false, error: "Internal server error." };
|
2024-04-16 13:02:16 -04:00
|
|
|
});
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
|
|
* @param {string} jobId - The job ID of the crawl operation.
|
|
|
|
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
checkCrawlStatus(jobId) {
|
|
|
|
return __awaiter(this, void 0, void 0, function* () {
|
|
|
|
const headers = this.prepareHeaders();
|
|
|
|
try {
|
|
|
|
const response = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
|
|
|
if (response.status === 200) {
|
|
|
|
return response.data;
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.handleError(response, "check crawl status");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (error) {
|
|
|
|
throw new Error(error.message);
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
return {
|
|
|
|
success: false,
|
|
|
|
status: "unknown",
|
|
|
|
error: "Internal server error.",
|
|
|
|
};
|
2024-04-16 13:02:16 -04:00
|
|
|
});
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Prepares the headers for an API request.
|
|
|
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
prepareHeaders() {
|
|
|
|
return {
|
2024-05-08 19:38:49 -04:00
|
|
|
"Content-Type": "application/json",
|
|
|
|
Authorization: `Bearer ${this.apiKey}`,
|
2024-04-16 13:02:16 -04:00
|
|
|
};
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Sends a POST request to the specified URL.
|
|
|
|
* @param {string} url - The URL to send the request to.
|
|
|
|
* @param {Params} data - The data to send in the request.
|
|
|
|
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
|
|
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
postRequest(url, data, headers) {
|
|
|
|
return axios.post(url, data, { headers });
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Sends a GET request to the specified URL.
|
|
|
|
* @param {string} url - The URL to send the request to.
|
|
|
|
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
|
|
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
getRequest(url, headers) {
|
|
|
|
return axios.get(url, { headers });
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Monitors the status of a crawl job until completion or failure.
|
|
|
|
* @param {string} jobId - The job ID of the crawl operation.
|
|
|
|
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
|
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
|
|
* @returns {Promise<any>} The final job status or data.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
monitorJobStatus(jobId, headers, timeout) {
|
|
|
|
return __awaiter(this, void 0, void 0, function* () {
|
|
|
|
while (true) {
|
|
|
|
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
|
|
|
if (statusResponse.status === 200) {
|
|
|
|
const statusData = statusResponse.data;
|
2024-05-08 19:38:49 -04:00
|
|
|
if (statusData.status === "completed") {
|
|
|
|
if ("data" in statusData) {
|
2024-04-16 13:02:16 -04:00
|
|
|
return statusData.data;
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
throw new Error("Crawl job completed but no data was returned");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
2024-04-16 13:02:16 -04:00
|
|
|
if (timeout < 2) {
|
|
|
|
timeout = 2;
|
|
|
|
}
|
2024-05-08 19:38:49 -04:00
|
|
|
yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2024-05-08 19:38:49 -04:00
|
|
|
this.handleError(statusResponse, "check crawl status");
|
2024-04-16 13:02:16 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2024-04-19 14:27:54 -04:00
|
|
|
/**
|
|
|
|
* Handles errors from API responses.
|
|
|
|
* @param {AxiosResponse} response - The response from the API.
|
|
|
|
* @param {string} action - The action being performed when the error occurred.
|
|
|
|
*/
|
2024-04-16 13:02:16 -04:00
|
|
|
handleError(response, action) {
|
|
|
|
if ([402, 409, 500].includes(response.status)) {
|
2024-05-08 19:38:49 -04:00
|
|
|
const errorMessage = response.data.error || "Unknown error occurred";
|
2024-04-16 13:02:16 -04:00
|
|
|
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|