diff --git a/apps/js-sdk/firecrawl/.gitignore b/apps/js-sdk/firecrawl/.gitignore new file mode 100644 index 0000000..c6bba59 --- /dev/null +++ b/apps/js-sdk/firecrawl/.gitignore @@ -0,0 +1,130 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* diff --git a/apps/js-sdk/firecrawl/README.md b/apps/js-sdk/firecrawl/README.md new file mode 100644 index 0000000..0757511 --- /dev/null +++ b/apps/js-sdk/firecrawl/README.md @@ -0,0 +1,148 @@ +# Firecrawl JavaScript SDK + +The Firecrawl JavaScript SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API. + +## Installation + +To install the Firecrawl JavaScript SDK, you can use npm: + +```bash +npm install @mendable/firecrawl-js +``` + +## Usage + +1. Get an API key from [firecrawl.dev](https://firecrawl.dev) +2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class. + + +Here's an example of how to use the SDK with error handling: + +```js + import FirecrawlApp from '@mendable/firecrawl-js'; + + async function main() { + try { + // Initialize the FirecrawlApp with your API key + const app = new FirecrawlApp({ apiKey: "YOUR_API_KEY" }); + + // Scrape a single URL + const url = 'https://mendable.ai'; + const scrapedData = await app.scrapeUrl(url); + console.log(scrapedData); + + // Crawl a website + const crawlUrl = 'https://mendable.ai'; + const crawlParams = { + crawlerOptions: { + excludes: ['blog/'], + includes: [], // leave empty for all pages + limit: 1000, + } + }; + + const crawlResult = await app.crawlUrl(crawlUrl, crawlParams); + console.log(crawlResult); + + } catch (error) { + console.error('An error occurred:', error.message); + } + } + + main(); +``` + +### Scraping a URL + +To scrape a single URL with error handling, use the `scrapeUrl` method. It takes the URL as a parameter and returns the scraped data as a dictionary. + +```js + async function scrapeExample() { + try { + const url = 'https://example.com'; + const scrapedData = await app.scrapeUrl(url); + console.log(scrapedData); + + } catch (error) { + console.error( + 'Error occurred while scraping:', + error.message + ); + } + } + + scrapeExample(); +``` + + +### Crawling a Website + +To crawl a website with error handling, use the `crawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format. + +```js +async function crawlExample() { + try { + const crawlUrl = 'https://example.com'; + const crawlParams = { + crawlerOptions: { + excludes: ['blog/'], + includes: [], // leave empty for all pages + limit: 1000, + } + }; + const waitUntilDone = true; + const timeout = 5; + const crawlResult = await app.crawlUrl( + crawlUrl, + crawlParams, + waitUntilDone, + timeout + ); + + console.log(crawlResult); + + } catch (error) { + console.error( + 'Error occurred while crawling:', + error.message + ); + } +} + +crawlExample(); +``` + + +### Checking Crawl Status + +To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job. + +```js +async function checkStatusExample(jobId) { + try { + const status = await app.checkCrawlStatus(jobId); + console.log(status); + + } catch (error) { + console.error( + 'Error occurred while checking crawl status:', + error.message + ); + } +} +// Example usage, assuming you have a jobId +checkStatusExample('your_job_id_here'); +``` + + +## Error Handling + +The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. The examples above demonstrate how to handle these errors using `try/catch` blocks. + +## Contributing + +Contributions to the Firecrawl JavaScript SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. + +## License + +The Firecrawl JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT). diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js new file mode 100644 index 0000000..be4223f --- /dev/null +++ b/apps/js-sdk/firecrawl/build/index.js @@ -0,0 +1,146 @@ +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +import axios from 'axios'; +import dotenv from 'dotenv'; +dotenv.config(); +export default class FirecrawlApp { + constructor({ apiKey = null }) { + this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + if (!this.apiKey) { + throw new Error('No API key provided'); + } + } + scrapeUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null) { + const headers = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + }; + let jsonData = { url }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData.data; + } + else { + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, 'scrape URL'); + } + } + catch (error) { + throw new Error(error.message); + } + }); + } + crawlUrl(url_1) { + return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) { + const headers = this.prepareHeaders(); + let jsonData = { url }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + if (response.status === 200) { + const jobId = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, timeout); + } + else { + return { jobId }; + } + } + else { + this.handleError(response, 'start crawl job'); + } + } + catch (error) { + console.log(error); + throw new Error(error.message); + } + }); + } + checkCrawlStatus(jobId) { + return __awaiter(this, void 0, void 0, function* () { + const headers = this.prepareHeaders(); + try { + const response = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + if (response.status === 200) { + return response.data; + } + else { + this.handleError(response, 'check crawl status'); + } + } + catch (error) { + throw new Error(error.message); + } + }); + } + prepareHeaders() { + return { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + }; + } + postRequest(url, data, headers) { + return axios.post(url, data, { headers }); + } + getRequest(url, headers) { + return axios.get(url, { headers }); + } + monitorJobStatus(jobId, headers, timeout) { + return __awaiter(this, void 0, void 0, function* () { + while (true) { + const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === 'completed') { + if ('data' in statusData) { + return statusData.data; + } + else { + throw new Error('Crawl job completed but no data was returned'); + } + } + else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + if (timeout < 2) { + timeout = 2; + } + yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + } + else { + throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + } + } + else { + this.handleError(statusResponse, 'check crawl status'); + } + } + }); + } + handleError(response, action) { + if ([402, 409, 500].includes(response.status)) { + const errorMessage = response.data.error || 'Unknown error occurred'; + throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + } + else { + throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + } + } +} diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json new file mode 100644 index 0000000..98fafc5 --- /dev/null +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -0,0 +1,172 @@ +{ + "name": "@mendable/firecrawl-js", + "version": "0.0.7", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@mendable/firecrawl-js", + "version": "0.0.7", + "license": "MIT", + "dependencies": { + "axios": "^1.6.8", + "dotenv": "^16.4.5" + }, + "devDependencies": { + "@types/axios": "^0.14.0", + "@types/dotenv": "^8.2.0", + "@types/node": "^20.12.7", + "typescript": "^5.4.5" + } + }, + "node_modules/@types/axios": { + "version": "0.14.0", + "resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.14.0.tgz", + "integrity": "sha512-KqQnQbdYE54D7oa/UmYVMZKq7CO4l8DEENzOKc4aBRwxCXSlJXGz83flFx5L7AWrOQnmuN3kVsRdt+GZPPjiVQ==", + "deprecated": "This is a stub types definition for axios (https://github.com/mzabriskie/axios). axios provides its own type definitions, so you don't need @types/axios installed!", + "dev": true, + "dependencies": { + "axios": "*" + } + }, + "node_modules/@types/dotenv": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz", + "integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==", + "deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.", + "dev": true, + "dependencies": { + "dotenv": "*" + } + }, + "node_modules/@types/node": { + "version": "20.12.7", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", + "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.6.8", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz", + "integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/typescript": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", + "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + } + } +} diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json new file mode 100644 index 0000000..89e6d3f --- /dev/null +++ b/apps/js-sdk/firecrawl/package.json @@ -0,0 +1,39 @@ +{ + "name": "@mendable/firecrawl-js", + "version": "0.0.9", + "description": "JavaScript SDK for Firecrawl API", + "main": "build/index.js", + "type": "module", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/mendableai/firecrawl.git" + }, + "author": "Mendable.ai", + "license": "MIT", + "dependencies": { + "axios": "^1.6.8", + "dotenv": "^16.4.5" + }, + "bugs": { + "url": "https://github.com/mendableai/firecrawl/issues" + }, + "homepage": "https://github.com/mendableai/firecrawl#readme", + "devDependencies": { + "@types/axios": "^0.14.0", + "@types/dotenv": "^8.2.0", + "@types/node": "^20.12.7", + "typescript": "^5.4.5" + }, + "keywords": [ + "firecrawl", + "mendable", + "crawler", + "web", + "scraper", + "api", + "sdk" + ] +} diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts new file mode 100644 index 0000000..3d105e7 --- /dev/null +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -0,0 +1,135 @@ +import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios'; +import dotenv from 'dotenv'; +dotenv.config(); + +interface FirecrawlAppConfig { + apiKey?: string | null; +} + +interface Params { + [key: string]: any; +} + +export default class FirecrawlApp { + private apiKey: string; + + constructor({ apiKey = null }: FirecrawlAppConfig) { + this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + if (!this.apiKey) { + throw new Error('No API key provided'); + } + } + + async scrapeUrl(url: string, params: Params | null = null): Promise { + const headers: AxiosRequestHeaders = { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + let jsonData: Params = { url }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData.data; + } else { + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + } + } else { + this.handleError(response, 'scrape URL'); + } + } catch (error: any) { + throw new Error(error.message); + } + } + + async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { + const headers = this.prepareHeaders(); + let jsonData: Params = { url }; + if (params) { + jsonData = { ...jsonData, ...params }; + } + try { + const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers); + if (response.status === 200) { + const jobId: string = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, timeout); + } else { + return { jobId }; + } + } else { + this.handleError(response, 'start crawl job'); + } + } catch (error: any) { + console.log(error) + throw new Error(error.message); + } + } + + async checkCrawlStatus(jobId: string): Promise { + const headers: AxiosRequestHeaders = this.prepareHeaders(); + try { + const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + if (response.status === 200) { + return response.data; + } else { + this.handleError(response, 'check crawl status'); + } + } catch (error: any) { + throw new Error(error.message); + } + } + + prepareHeaders(): AxiosRequestHeaders { + return { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + } as AxiosRequestHeaders; + } + + postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise { + return axios.post(url, data, { headers }); + } + + getRequest(url: string, headers: AxiosRequestHeaders): Promise { + return axios.get(url, { headers }); + } + + async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise { + while (true) { + const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === 'completed') { + if ('data' in statusData) { + return statusData.data; + } else { + throw new Error('Crawl job completed but no data was returned'); + } + } else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) { + if (timeout < 2) { + timeout = 2; + } + await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again + } else { + throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + } + } else { + this.handleError(statusResponse, 'check crawl status'); + } + } + } + + handleError(response: AxiosResponse, action: string): void { + if ([402, 409, 500].includes(response.status)) { + const errorMessage: string = response.data.error || 'Unknown error occurred'; + throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + } else { + throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + } + } +} \ No newline at end of file diff --git a/apps/js-sdk/firecrawl/tsconfig.json b/apps/js-sdk/firecrawl/tsconfig.json new file mode 100644 index 0000000..5bca86d --- /dev/null +++ b/apps/js-sdk/firecrawl/tsconfig.json @@ -0,0 +1,109 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig to read more about this file */ + + /* Projects */ + // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ + // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ + // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ + // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ + // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ + // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + + /* Language and Environment */ + "target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ + // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ + // "jsx": "preserve", /* Specify what JSX code is generated. */ + // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ + // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ + // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ + // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ + // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ + // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ + // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ + // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ + + /* Modules */ + "module": "NodeNext", /* Specify what module code is generated. */ + "rootDir": "./src", /* Specify the root folder within your source files. */ + "moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */ + // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ + // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ + // "types": [], /* Specify type package names to be included without being referenced in a source file. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ + // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ + // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ + // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ + // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ + // "resolveJsonModule": true, /* Enable importing .json files. */ + // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ + // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ + + /* JavaScript Support */ + // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ + // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ + // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ + + /* Emit */ + // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ + // "declarationMap": true, /* Create sourcemaps for d.ts files. */ + // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ + // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ + // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ + // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ + "outDir": "./build", /* Specify an output folder for all emitted files. */ + // "removeComments": true, /* Disable emitting comments. */ + // "noEmit": true, /* Disable emitting files from a compilation. */ + // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ + // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */ + // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ + // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ + // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ + // "newLine": "crlf", /* Set the newline character for emitting files. */ + // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ + // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ + // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ + // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ + // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ + // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ + + /* Interop Constraints */ + // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ + // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ + // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ + "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */ + // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ + "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ + + /* Type Checking */ + "strict": true, /* Enable all strict type-checking options. */ + // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ + // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ + // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ + // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ + // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ + // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ + // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ + // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ + // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ + // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ + // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ + // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ + // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ + // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ + // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ + // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ + // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ + + /* Completeness */ + // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ + "skipLibCheck": true /* Skip type checking all .d.ts files. */ + } +}