0
v-firecrawl/apps/js-sdk/firecrawl/src/index.ts

229 lines
7.7 KiB
TypeScript
Raw Normal View History

2024-04-16 13:02:16 -04:00
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
import dotenv from 'dotenv';
dotenv.config();
2024-04-19 13:49:35 -04:00
/**
* Configuration interface for FirecrawlApp.
*/
export interface FirecrawlAppConfig {
2024-04-16 13:02:16 -04:00
apiKey?: string | null;
}
2024-04-19 13:49:35 -04:00
/**
* Generic parameter interface.
*/
export interface Params {
2024-04-16 13:02:16 -04:00
[key: string]: any;
}
2024-04-19 13:49:35 -04:00
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
error?: string;
}
/**
* Main class for interacting with the Firecrawl API.
*/
2024-04-16 13:02:16 -04:00
export default class FirecrawlApp {
private apiKey: string;
2024-04-19 13:49:35 -04:00
/**
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
2024-04-16 13:02:16 -04:00
constructor({ apiKey = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
if (!this.apiKey) {
throw new Error('No API key provided');
}
}
2024-04-19 13:49:35 -04:00
/**
* Scrapes a URL using the Firecrawl API.
* @param {string} url - The URL to scrape.
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
2024-04-16 13:02:16 -04:00
const headers: AxiosRequestHeaders = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
2024-04-19 13:49:35 -04:00
return responseData;
2024-04-16 13:02:16 -04:00
} else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
}
} else {
this.handleError(response, 'scrape URL');
}
} catch (error: any) {
throw new Error(error.message);
}
2024-04-19 13:49:35 -04:00
return { success: false, error: 'Internal server error.' };
2024-04-16 13:02:16 -04:00
}
2024-04-19 13:49:35 -04:00
/**
* Initiates a crawl job for a URL using the Firecrawl API.
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<CrawlResponse>} The response from the crawl operation.
*/
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<CrawlResponse> {
2024-04-16 13:02:16 -04:00
const headers = this.prepareHeaders();
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
if (response.status === 200) {
const jobId: string = response.data.jobId;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, timeout);
} else {
2024-04-19 13:49:35 -04:00
return { success: true, jobId };
2024-04-16 13:02:16 -04:00
}
} else {
this.handleError(response, 'start crawl job');
}
} catch (error: any) {
console.log(error)
throw new Error(error.message);
}
2024-04-19 13:49:35 -04:00
return { success: false, error: 'Internal server error.' };
2024-04-16 13:02:16 -04:00
}
2024-04-19 13:49:35 -04:00
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
* @returns {Promise<JobStatusResponse>} The response containing the job status.
*/
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
2024-04-16 13:02:16 -04:00
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, 'check crawl status');
}
} catch (error: any) {
throw new Error(error.message);
}
2024-04-19 13:49:35 -04:00
return { success: false, status: 'unknown', error: 'Internal server error.' };
2024-04-16 13:02:16 -04:00
}
2024-04-19 13:49:35 -04:00
/**
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
2024-04-16 13:02:16 -04:00
prepareHeaders(): AxiosRequestHeaders {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
}
2024-04-19 13:49:35 -04:00
/**
* Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {Params} data - The data to send in the request.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the POST request.
*/
2024-04-16 13:02:16 -04:00
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
return axios.post(url, data, { headers });
}
2024-04-19 13:49:35 -04:00
/**
* Sends a GET request to the specified URL.
* @param {string} url - The URL to send the request to.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the GET request.
*/
2024-04-16 13:02:16 -04:00
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
return axios.get(url, { headers });
}
2024-04-19 13:49:35 -04:00
/**
* Monitors the status of a crawl job until completion or failure.
* @param {string} jobId - The job ID of the crawl operation.
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
2024-04-16 13:02:16 -04:00
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === 'completed') {
if ('data' in statusData) {
return statusData.data;
} else {
throw new Error('Crawl job completed but no data was returned');
}
} else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
if (timeout < 2) {
timeout = 2;
}
await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
} else {
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
}
} else {
this.handleError(statusResponse, 'check crawl status');
}
}
}
2024-04-19 13:49:35 -04:00
/**
* Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API.
* @param {string} action - The action being performed when the error occurred.
*/
2024-04-16 13:02:16 -04:00
handleError(response: AxiosResponse, action: string): void {
if ([402, 409, 500].includes(response.status)) {
const errorMessage: string = response.data.error || 'Unknown error occurred';
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
} else {
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
}
}
}