Merge branch 'main' into nsc/wait-for-param
This commit is contained in:
commit
f53d25efac
19
.github/workflows/fly.yml
vendored
19
.github/workflows/fly.yml
vendored
@ -94,6 +94,25 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
npm run test
|
npm run test
|
||||||
working-directory: ./apps/test-suite
|
working-directory: ./apps/test-suite
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
- name: Run E2E tests for Python SDK
|
||||||
|
run: |
|
||||||
|
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
- name: Install dependencies for JavaScript SDK
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
- name: Run E2E tests for JavaScript SDK
|
||||||
|
run: npm run test
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
name: Deploy app
|
name: Deploy app
|
||||||
|
60
.github/workflows/js-sdk.yml
vendored
Normal file
60
.github/workflows/js-sdk.yml
vendored
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
name: Run JavaScript SDK E2E Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
env:
|
||||||
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||||
|
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||||
|
HOST: ${{ secrets.HOST }}
|
||||||
|
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||||
|
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||||
|
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||||
|
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||||
|
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||||
|
PORT: ${{ secrets.PORT }}
|
||||||
|
REDIS_URL: ${{ secrets.REDIS_URL }}
|
||||||
|
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
|
||||||
|
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
|
||||||
|
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||||
|
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||||
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
|
||||||
|
HDX_NODE_BETA_MODE: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
|
ports:
|
||||||
|
- 6379:6379
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
- name: Install pnpm
|
||||||
|
run: npm install -g pnpm
|
||||||
|
- name: Install dependencies for API
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Start the application
|
||||||
|
run: npm start &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Start workers
|
||||||
|
run: npm run workers &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Install dependencies for JavaScript SDK
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
- name: Run E2E tests for JavaScript SDK
|
||||||
|
run: npm run test
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
72
.github/workflows/python-sdk.yml
vendored
Normal file
72
.github/workflows/python-sdk.yml
vendored
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
name: Run Python SDK E2E Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
env:
|
||||||
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||||
|
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||||
|
HOST: ${{ secrets.HOST }}
|
||||||
|
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||||
|
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||||
|
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||||
|
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||||
|
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||||
|
PORT: ${{ secrets.PORT }}
|
||||||
|
REDIS_URL: ${{ secrets.REDIS_URL }}
|
||||||
|
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
|
||||||
|
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
|
||||||
|
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||||
|
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||||
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
|
||||||
|
HDX_NODE_BETA_MODE: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.10"]
|
||||||
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
|
ports:
|
||||||
|
- 6379:6379
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
- name: Install pnpm
|
||||||
|
run: npm install -g pnpm
|
||||||
|
- name: Install dependencies for API
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Start the application
|
||||||
|
run: npm start &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
id: start_app
|
||||||
|
- name: Start workers
|
||||||
|
run: npm run workers &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
id: start_workers
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt
|
||||||
|
working-directory: ./apps/python-sdk
|
||||||
|
- name: Run E2E tests for Python SDK
|
||||||
|
run: |
|
||||||
|
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||||
|
working-directory: ./apps/python-sdk
|
@ -402,7 +402,6 @@ const searchResults = await app.search(query, {
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
||||||
|
3
apps/js-sdk/firecrawl/.env.example
Normal file
3
apps/js-sdk/firecrawl/.env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
API_URL=http://localhost:3002
|
||||||
|
TEST_API_KEY=fc-YOUR_API_KEY
|
||||||
|
|
66
apps/js-sdk/firecrawl/package-lock.json
generated
66
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,22 +1,27 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.17-beta.8",
|
"version": "0.0.22",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.17-beta.8",
|
"version": "0.0.22",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
"dotenv": "^16.4.5",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/node": "^20.12.7",
|
"@types/dotenv": "^8.2.0",
|
||||||
|
"@types/jest": "^29.5.12",
|
||||||
|
"@types/node": "^20.12.12",
|
||||||
|
"@types/uuid": "^9.0.8",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"ts-jest": "^29.1.2",
|
"ts-jest": "^29.1.2",
|
||||||
"typescript": "^5.4.5"
|
"typescript": "^5.4.5"
|
||||||
@ -1013,6 +1018,16 @@
|
|||||||
"@babel/types": "^7.20.7"
|
"@babel/types": "^7.20.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/dotenv": {
|
||||||
|
"version": "8.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz",
|
||||||
|
"integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==",
|
||||||
|
"deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"dotenv": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/graceful-fs": {
|
"node_modules/@types/graceful-fs": {
|
||||||
"version": "4.1.9",
|
"version": "4.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
|
||||||
@ -1046,10 +1061,20 @@
|
|||||||
"@types/istanbul-lib-report": "*"
|
"@types/istanbul-lib-report": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/jest": {
|
||||||
|
"version": "29.5.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
|
||||||
|
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"expect": "^29.0.0",
|
||||||
|
"pretty-format": "^29.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/node": {
|
"node_modules/@types/node": {
|
||||||
"version": "20.12.7",
|
"version": "20.12.12",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz",
|
||||||
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
|
"integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~5.26.4"
|
"undici-types": "~5.26.4"
|
||||||
@ -1061,6 +1086,12 @@
|
|||||||
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
|
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/uuid": {
|
||||||
|
"version": "9.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
|
"integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
"node_modules/@types/yargs": {
|
"node_modules/@types/yargs": {
|
||||||
"version": "17.0.32",
|
"version": "17.0.32",
|
||||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
|
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
|
||||||
@ -1602,6 +1633,17 @@
|
|||||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/dotenv": {
|
||||||
|
"version": "16.4.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||||
|
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://dotenvx.com"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/electron-to-chromium": {
|
"node_modules/electron-to-chromium": {
|
||||||
"version": "1.4.748",
|
"version": "1.4.748",
|
||||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz",
|
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz",
|
||||||
@ -3641,6 +3683,18 @@
|
|||||||
"browserslist": ">= 4.21.0"
|
"browserslist": ">= 4.21.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/uuid": {
|
||||||
|
"version": "9.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||||
|
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||||
|
"funding": [
|
||||||
|
"https://github.com/sponsors/broofa",
|
||||||
|
"https://github.com/sponsors/ctavan"
|
||||||
|
],
|
||||||
|
"bin": {
|
||||||
|
"uuid": "dist/bin/uuid"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/v8-to-istanbul": {
|
"node_modules/v8-to-istanbul": {
|
||||||
"version": "9.2.0",
|
"version": "9.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"publish": "npm run build && npm publish --access public",
|
"publish": "npm run build && npm publish --access public",
|
||||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||||
"test": "jest src/**/*.test.ts"
|
"test": "jest src/__tests__/**/*.test.ts"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
@ -19,6 +19,8 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
"dotenv": "^16.4.5",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
},
|
},
|
||||||
@ -29,7 +31,10 @@
|
|||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@jest/globals": "^29.7.0",
|
"@jest/globals": "^29.7.0",
|
||||||
"@types/axios": "^0.14.0",
|
"@types/axios": "^0.14.0",
|
||||||
"@types/node": "^20.12.7",
|
"@types/dotenv": "^8.2.0",
|
||||||
|
"@types/jest": "^29.5.12",
|
||||||
|
"@types/node": "^20.12.12",
|
||||||
|
"@types/uuid": "^9.0.8",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"ts-jest": "^29.1.2",
|
"ts-jest": "^29.1.2",
|
||||||
"typescript": "^5.4.5"
|
"typescript": "^5.4.5"
|
||||||
|
146
apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
Normal file
146
apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
import FirecrawlApp from '../../index';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||||
|
const API_URL = process.env.API_URL;
|
||||||
|
|
||||||
|
describe('FirecrawlApp E2E Tests', () => {
|
||||||
|
test('should throw error for no API key', () => {
|
||||||
|
expect(() => {
|
||||||
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||||
|
}).toThrow("No API key provided");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should throw error for invalid API key on scrape', async () => {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should throw error for blocklisted URL on scrape', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should return successful response with valid preview token', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||||
|
}, 10000); // 10 seconds timeout
|
||||||
|
|
||||||
|
test('should return successful response for valid scrape', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||||
|
expect(response.data).toHaveProperty('markdown');
|
||||||
|
expect(response.data).toHaveProperty('metadata');
|
||||||
|
expect(response.data).not.toHaveProperty('html');
|
||||||
|
}, 10000); // 10 seconds timeout
|
||||||
|
|
||||||
|
test('should return successful response with valid API key and include HTML', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||||
|
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
||||||
|
expect(response.data.html).toContain("<h1");
|
||||||
|
}, 10000); // 10 seconds timeout
|
||||||
|
|
||||||
|
test('should return successful response for valid scrape with PDF file', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test('should throw error for invalid API key on crawl', async () => {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should throw error for blocklisted URL on crawl', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should return successful response for crawl and wait for completion', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response[0].content).toContain("🔥 Firecrawl");
|
||||||
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
|
test('should handle idempotency key for crawl', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
|
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
|
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should check crawl status', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
|
||||||
|
const statusResponse = await app.checkCrawlStatus(response.jobId);
|
||||||
|
expect(statusResponse).not.toBeNull();
|
||||||
|
expect(statusResponse.status).toBe('completed');
|
||||||
|
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test('should return successful response for search', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.search("test query");
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data[0].content).toBeDefined();
|
||||||
|
expect(response.data.length).toBeGreaterThan(2);
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
|
test('should throw error for invalid API key on search', async () => {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||||
|
});
|
||||||
|
|
||||||
|
test('should perform LLM extraction', async () => {
|
||||||
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||||
|
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||||
|
extractorOptions: {
|
||||||
|
mode: 'llm-extraction',
|
||||||
|
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||||
|
extractionSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
company_mission: { type: 'string' },
|
||||||
|
supports_sso: { type: 'boolean' },
|
||||||
|
is_open_source: { type: 'boolean' }
|
||||||
|
},
|
||||||
|
required: ['company_mission', 'supports_sso', 'is_open_source']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data.llm_extraction).toBeDefined();
|
||||||
|
const llmExtraction = response.data.llm_extraction;
|
||||||
|
expect(llmExtraction.company_mission).toBeDefined();
|
||||||
|
expect(typeof llmExtraction.supports_sso).toBe('boolean');
|
||||||
|
expect(typeof llmExtraction.is_open_source).toBe('boolean');
|
||||||
|
}, 30000); // 30 seconds timeout
|
||||||
|
});
|
@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
|||||||
*/
|
*/
|
||||||
export interface FirecrawlAppConfig {
|
export interface FirecrawlAppConfig {
|
||||||
apiKey?: string | null;
|
apiKey?: string | null;
|
||||||
|
apiUrl?: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -63,6 +64,7 @@ export interface JobStatusResponse {
|
|||||||
*/
|
*/
|
||||||
export default class FirecrawlApp {
|
export default class FirecrawlApp {
|
||||||
private apiKey: string;
|
private apiKey: string;
|
||||||
|
private apiUrl: string = "https://api.firecrawl.dev";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
@ -107,7 +109,7 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await axios.post(
|
const response: AxiosResponse = await axios.post(
|
||||||
"https://api.firecrawl.dev/v0/scrape",
|
this.apiUrl + "/v0/scrape",
|
||||||
jsonData,
|
jsonData,
|
||||||
{ headers },
|
{ headers },
|
||||||
);
|
);
|
||||||
@ -147,7 +149,7 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await axios.post(
|
const response: AxiosResponse = await axios.post(
|
||||||
"https://api.firecrawl.dev/v0/search",
|
this.apiUrl + "/v0/search",
|
||||||
jsonData,
|
jsonData,
|
||||||
{ headers }
|
{ headers }
|
||||||
);
|
);
|
||||||
@ -190,7 +192,7 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.postRequest(
|
const response: AxiosResponse = await this.postRequest(
|
||||||
"https://api.firecrawl.dev/v0/crawl",
|
this.apiUrl + "/v0/crawl",
|
||||||
jsonData,
|
jsonData,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
@ -220,7 +222,7 @@ export default class FirecrawlApp {
|
|||||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.getRequest(
|
const response: AxiosResponse = await this.getRequest(
|
||||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
@ -292,7 +294,7 @@ export default class FirecrawlApp {
|
|||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
while (true) {
|
while (true) {
|
||||||
const statusResponse: AxiosResponse = await this.getRequest(
|
const statusResponse: AxiosResponse = await this.getRequest(
|
||||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
|
38
apps/js-sdk/package-lock.json
generated
38
apps/js-sdk/package-lock.json
generated
@ -11,9 +11,10 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@mendable/firecrawl-js": "^0.0.19",
|
"@mendable/firecrawl-js": "^0.0.19",
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
"uuid": "^9.0.1",
|
"dotenv": "^16.4.5",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.4.5",
|
"typescript": "^5.4.5",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@ -531,6 +532,17 @@
|
|||||||
"node": ">=0.3.1"
|
"node": ">=0.3.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/dotenv": {
|
||||||
|
"version": "16.4.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||||
|
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://dotenvx.com"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/esbuild": {
|
"node_modules/esbuild": {
|
||||||
"version": "0.20.2",
|
"version": "0.20.2",
|
||||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
|
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
|
||||||
@ -744,6 +756,18 @@
|
|||||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||||
"peer": true
|
"peer": true
|
||||||
},
|
},
|
||||||
|
"node_modules/uuid": {
|
||||||
|
"version": "9.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||||
|
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||||
|
"funding": [
|
||||||
|
"https://github.com/sponsors/broofa",
|
||||||
|
"https://github.com/sponsors/ctavan"
|
||||||
|
],
|
||||||
|
"bin": {
|
||||||
|
"uuid": "dist/bin/uuid"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/v8-compile-cache-lib": {
|
"node_modules/v8-compile-cache-lib": {
|
||||||
"version": "3.0.1",
|
"version": "3.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||||
@ -772,18 +796,6 @@
|
|||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"zod": "^3.23.3"
|
"zod": "^3.23.3"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"node_modules/uuid": {
|
|
||||||
"version": "9.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
|
||||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
|
||||||
"funding": [
|
|
||||||
"https://github.com/sponsors/broofa",
|
|
||||||
"https://github.com/sponsors/ctavan"
|
|
||||||
],
|
|
||||||
"bin": {
|
|
||||||
"uuid": "dist/bin/uuid"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,8 @@
|
|||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
|
||||||
"uuid": "^9.0.1",
|
|
||||||
"@mendable/firecrawl-js": "^0.0.19",
|
"@mendable/firecrawl-js": "^0.0.19",
|
||||||
|
"axios": "^1.6.8",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.4.5",
|
"typescript": "^5.4.5",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
|
@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id)
|
|||||||
|
|
||||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
||||||
|
|
||||||
|
## Running the Tests with Pytest
|
||||||
|
|
||||||
|
To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
|
||||||
|
|
||||||
|
### Running the Tests
|
||||||
|
|
||||||
|
To run the tests, execute the following commands:
|
||||||
|
|
||||||
|
Install pytest:
|
||||||
|
```bash
|
||||||
|
pip install pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
Run:
|
||||||
|
```bash
|
||||||
|
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
API_URL=http://localhost:3002
|
||||||
|
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
|
||||||
|
TEST_API_KEY=fc-YOUR_API_KEY
|
168
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
Normal file
168
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
import importlib.util
|
||||||
|
import pytest
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from uuid import uuid4
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
API_URL = "http://127.0.0.1:3002";
|
||||||
|
ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
|
||||||
|
TEST_API_KEY = os.getenv('TEST_API_KEY')
|
||||||
|
|
||||||
|
print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
|
||||||
|
firecrawl = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(firecrawl)
|
||||||
|
FirecrawlApp = firecrawl.FirecrawlApp
|
||||||
|
|
||||||
|
def test_no_api_key():
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app = FirecrawlApp(api_url=API_URL)
|
||||||
|
assert "No API key provided" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_scrape_url_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.scrape_url('https://firecrawl.dev')
|
||||||
|
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_blocklisted_url():
|
||||||
|
blocklisted_url = "https://facebook.com/fake-test"
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
app.scrape_url(blocklisted_url)
|
||||||
|
assert "Failed to scrape URL. Status code: 403" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_successful_response_with_valid_preview_token():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
||||||
|
response = app.scrape_url('https://firecrawl.dev')
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert "🔥 Firecrawl" in response['content']
|
||||||
|
|
||||||
|
def test_scrape_url_e2e():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url('https://firecrawl.dev')
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert 'markdown' in response
|
||||||
|
assert 'metadata' in response
|
||||||
|
assert 'html' not in response
|
||||||
|
assert "🔥 Firecrawl" in response['content']
|
||||||
|
|
||||||
|
def test_successful_response_with_valid_api_key_and_include_html():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert 'markdown' in response
|
||||||
|
assert 'html' in response
|
||||||
|
assert 'metadata' in response
|
||||||
|
assert "🔥 Firecrawl" in response['content']
|
||||||
|
assert "🔥 Firecrawl" in response['markdown']
|
||||||
|
assert "<h1" in response['html']
|
||||||
|
|
||||||
|
def test_successful_response_for_valid_scrape_with_pdf_file():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert 'metadata' in response
|
||||||
|
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
|
||||||
|
|
||||||
|
def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
|
||||||
|
time.sleep(6) # wait for 6 seconds
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert 'metadata' in response
|
||||||
|
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
|
||||||
|
|
||||||
|
def test_crawl_url_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.crawl_url('https://firecrawl.dev')
|
||||||
|
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_should_return_error_for_blocklisted_url():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
blocklisted_url = "https://twitter.com/fake-test"
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
app.crawl_url(blocklisted_url)
|
||||||
|
assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_crawl_url_wait_for_completion_e2e():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||||
|
assert response is not None
|
||||||
|
assert len(response) > 0
|
||||||
|
assert 'content' in response[0]
|
||||||
|
assert "🔥 Firecrawl" in response[0]['content']
|
||||||
|
|
||||||
|
def test_crawl_url_with_idempotency_key_e2e():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
uniqueIdempotencyKey = str(uuid4())
|
||||||
|
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||||
|
assert response is not None
|
||||||
|
assert len(response) > 0
|
||||||
|
assert 'content' in response[0]
|
||||||
|
assert "🔥 Firecrawl" in response[0]['content']
|
||||||
|
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||||
|
assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_check_crawl_status_e2e():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
|
||||||
|
assert response is not None
|
||||||
|
assert 'jobId' in response
|
||||||
|
|
||||||
|
time.sleep(30) # wait for 30 seconds
|
||||||
|
status_response = app.check_crawl_status(response['jobId'])
|
||||||
|
assert status_response is not None
|
||||||
|
assert 'status' in status_response
|
||||||
|
assert status_response['status'] == 'completed'
|
||||||
|
assert 'data' in status_response
|
||||||
|
assert len(status_response['data']) > 0
|
||||||
|
|
||||||
|
def test_search_e2e():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.search("test query")
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response[0]
|
||||||
|
assert len(response) > 2
|
||||||
|
|
||||||
|
def test_search_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.search("test query")
|
||||||
|
assert "Failed to search. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_llm_extraction():
|
||||||
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url("https://mendable.ai", {
|
||||||
|
'extractorOptions': {
|
||||||
|
'mode': 'llm-extraction',
|
||||||
|
'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||||
|
'extractionSchema': {
|
||||||
|
'type': 'object',
|
||||||
|
'properties': {
|
||||||
|
'company_mission': {'type': 'string'},
|
||||||
|
'supports_sso': {'type': 'boolean'},
|
||||||
|
'is_open_source': {'type': 'boolean'}
|
||||||
|
},
|
||||||
|
'required': ['company_mission', 'supports_sso', 'is_open_source']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
assert response is not None
|
||||||
|
assert 'llm_extraction' in response
|
||||||
|
llm_extraction = response['llm_extraction']
|
||||||
|
assert 'company_mission' in llm_extraction
|
||||||
|
assert isinstance(llm_extraction['supports_sso'], bool)
|
||||||
|
assert isinstance(llm_extraction['is_open_source'], bool)
|
3
apps/python-sdk/requirements.txt
Normal file
3
apps/python-sdk/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
requests
|
||||||
|
pytest
|
||||||
|
python-dotenv
|
@ -16,7 +16,9 @@ setup(
|
|||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"requests",
|
'requests',
|
||||||
|
'pytest',
|
||||||
|
'python-dotenv',
|
||||||
],
|
],
|
||||||
python_requires='>=3.8',
|
python_requires='>=3.8',
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
Loading…
Reference in New Issue
Block a user