0

added github action workflow

This commit is contained in:
rafaelsideguide 2024-05-27 14:14:00 -03:00
parent 397769c7e3
commit 63772ea711
9 changed files with 354 additions and 61 deletions

31
.github/workflows/python-tests.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: Run Python SDK E2E Tests
on:
pull_request:
branches:
- main
env:
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python setup.py
working-directory: ./apps/python-sdk
- name: Test with pytest
run: |
cd apps/python-sdk
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk

View File

@ -0,0 +1,3 @@
API_URL=http://localhost:3002
TEST_API_KEY=fc-YOUR_API_KEY

View File

@ -1,22 +1,25 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.17-beta.8", "version": "0.0.22",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.17-beta.8", "version": "0.0.22",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",
"uuid": "^9.0.1",
"zod": "^3.23.8", "zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0" "zod-to-json-schema": "^3.23.0"
}, },
"devDependencies": { "devDependencies": {
"@jest/globals": "^29.7.0", "@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0", "@types/axios": "^0.14.0",
"@types/node": "^20.12.7", "@types/jest": "^29.5.12",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.1.2", "ts-jest": "^29.1.2",
"typescript": "^5.4.5" "typescript": "^5.4.5"
@ -1046,10 +1049,20 @@
"@types/istanbul-lib-report": "*" "@types/istanbul-lib-report": "*"
} }
}, },
"node_modules/@types/jest": {
"version": "29.5.12",
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
"dev": true,
"dependencies": {
"expect": "^29.0.0",
"pretty-format": "^29.0.0"
}
},
"node_modules/@types/node": { "node_modules/@types/node": {
"version": "20.12.7", "version": "20.12.12",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz",
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", "integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"undici-types": "~5.26.4" "undici-types": "~5.26.4"
@ -1061,6 +1074,12 @@
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
"dev": true "dev": true
}, },
"node_modules/@types/uuid": {
"version": "9.0.8",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
"integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==",
"dev": true
},
"node_modules/@types/yargs": { "node_modules/@types/yargs": {
"version": "17.0.32", "version": "17.0.32",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
@ -3641,6 +3660,18 @@
"browserslist": ">= 4.21.0" "browserslist": ">= 4.21.0"
} }
}, },
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/v8-to-istanbul": { "node_modules/v8-to-istanbul": {
"version": "9.2.0", "version": "9.2.0",
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",

View File

@ -9,7 +9,7 @@
"build": "tsc", "build": "tsc",
"publish": "npm run build && npm publish --access public", "publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta", "publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/**/*.test.ts" "test": "jest src/__tests__/**/*.test.ts"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@ -19,6 +19,7 @@
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",
"uuid": "^9.0.1",
"zod": "^3.23.8", "zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0" "zod-to-json-schema": "^3.23.0"
}, },
@ -29,7 +30,9 @@
"devDependencies": { "devDependencies": {
"@jest/globals": "^29.7.0", "@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0", "@types/axios": "^0.14.0",
"@types/node": "^20.12.7", "@types/jest": "^29.5.12",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.1.2", "ts-jest": "^29.1.2",
"typescript": "^5.4.5" "typescript": "^5.4.5"

View File

@ -0,0 +1,147 @@
import FirecrawlApp from '../../index';
import { v4 as uuidv4 } from 'uuid';
import dotenv from 'dotenv';
dotenv.config();
const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL;
describe('FirecrawlApp E2E Tests', () => {
test('should throw error for no API key', () => {
expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided");
});
test('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
});
test('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
test('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
}, 10000); // 10 seconds timeout
test('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html');
}, 10000); // 10 seconds timeout
test('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.markdown).toContain("🔥 Firecrawl");
expect(response.data.html).toContain("<h1");
}, 10000); // 10 seconds timeout
test('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
});
test('should throw error for blocklisted URL on crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
test('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");
}, 60000); // 60 seconds timeout
test('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
}, 30000); // 30 seconds timeout
test('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
const statusResponse = await app.checkCrawlStatus(response.jobId);
expect(statusResponse).not.toBeNull();
expect(statusResponse.status).toBe('completed');
expect(statusResponse.data.length).toBeGreaterThan(0);
}, 30000); // 30 seconds timeout
test('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.search("test query");
expect(response).not.toBeNull();
expect(response.data[0].content).toBeDefined();
expect(response.data.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout
test('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
});
test('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
mode: 'llm-extraction',
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
extractionSchema: {
type: 'object',
properties: {
company_mission: { type: 'string' },
supports_sso: { type: 'boolean' },
is_open_source: { type: 'boolean' }
},
required: ['company_mission', 'supports_sso', 'is_open_source']
}
}
});
expect(response).not.toBeNull();
expect(response.data.llm_extraction).toBeDefined();
const llmExtraction = response.data.llm_extraction;
expect(llmExtraction.company_mission).toBeDefined();
expect(typeof llmExtraction.supports_sso).toBe('boolean');
expect(typeof llmExtraction.is_open_source).toBe('boolean');
}, 30000); // 30 seconds timeout
});

View File

@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
*/ */
export interface FirecrawlAppConfig { export interface FirecrawlAppConfig {
apiKey?: string | null; apiKey?: string | null;
apiUrl?: string | null;
} }
/** /**
@ -63,6 +64,7 @@ export interface JobStatusResponse {
*/ */
export default class FirecrawlApp { export default class FirecrawlApp {
private apiKey: string; private apiKey: string;
private apiUrl: string = "https://api.firecrawl.dev";
/** /**
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
@ -107,7 +109,7 @@ export default class FirecrawlApp {
} }
try { try {
const response: AxiosResponse = await axios.post( const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/scrape", this.apiUrl + "/v0/scrape",
jsonData, jsonData,
{ headers }, { headers },
); );
@ -147,7 +149,7 @@ export default class FirecrawlApp {
} }
try { try {
const response: AxiosResponse = await axios.post( const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/search", this.apiUrl + "/v0/search",
jsonData, jsonData,
{ headers } { headers }
); );
@ -190,7 +192,7 @@ export default class FirecrawlApp {
} }
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
"https://api.firecrawl.dev/v0/crawl", this.apiUrl + "/v0/crawl",
jsonData, jsonData,
headers headers
); );
@ -220,7 +222,7 @@ export default class FirecrawlApp {
const headers: AxiosRequestHeaders = this.prepareHeaders(); const headers: AxiosRequestHeaders = this.prepareHeaders();
try { try {
const response: AxiosResponse = await this.getRequest( const response: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, this.apiUrl + `/v0/crawl/status/${jobId}`,
headers headers
); );
if (response.status === 200) { if (response.status === 200) {
@ -292,7 +294,7 @@ export default class FirecrawlApp {
): Promise<any> { ): Promise<any> {
while (true) { while (true) {
const statusResponse: AxiosResponse = await this.getRequest( const statusResponse: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, this.apiUrl + `/v0/crawl/status/${jobId}`,
headers headers
); );
if (statusResponse.status === 200) { if (statusResponse.status === 200) {

View File

@ -0,0 +1,3 @@
API_URL=http://localhost:3002
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
TEST_API_KEY=fc-YOUR_API_KEY

View File

@ -1,78 +1,150 @@
import importlib.util
import pytest import pytest
from firecrawl import FirecrawlApp import time
import os
from uuid import uuid4
from dotenv import load_dotenv
TEST_API_KEY = "fc-YOUR_API_KEY" load_dotenv()
TEST_URL = "https://firecrawl.dev"
def test_scrape_url_e2e(): API_URL = "http://127.0.0.1:3002";
app = FirecrawlApp(api_key=TEST_API_KEY) ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py"
response = app.scrape_url(TEST_URL) TEST_API_KEY = os.getenv('TEST_API_KEY')
print(response)
print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
firecrawl = importlib.util.module_from_spec(spec)
spec.loader.exec_module(firecrawl)
FirecrawlApp = firecrawl.FirecrawlApp
def test_no_api_key():
with pytest.raises(Exception) as excinfo:
invalid_app = FirecrawlApp(api_url=API_URL)
assert "No API key provided" in str(excinfo.value)
def test_scrape_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
invalid_app.scrape_url('https://firecrawl.dev')
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
def test_blocklisted_url():
blocklisted_url = "https://facebook.com/fake-test"
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
with pytest.raises(Exception) as excinfo:
app.scrape_url(blocklisted_url)
assert "Failed to scrape URL. Status code: 403" in str(excinfo.value)
def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
response = app.scrape_url('https://firecrawl.dev')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
assert "🔥 Firecrawl" in response['content'] assert "🔥 Firecrawl" in response['content']
def test_scrape_url_invalid_api_key(): def test_scrape_url_e2e():
invalid_app = FirecrawlApp(api_key="invalid_api_key") app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
with pytest.raises(Exception) as excinfo: response = app.scrape_url('https://firecrawl.dev')
invalid_app.scrape_url(TEST_URL) assert response is not None
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) assert 'content' in response
assert 'markdown' in response
assert 'metadata' in response
assert 'html' not in response
assert "🔥 Firecrawl" in response['content']
def test_crawl_url_e2e(): def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True) response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
assert response is not None
assert 'content' in response
assert 'markdown' in response
assert 'html' in response
assert 'metadata' in response
assert "🔥 Firecrawl" in response['content']
assert "🔥 Firecrawl" in response['markdown']
assert "<h1" in response['html']
def test_successful_response_for_valid_scrape_with_pdf_file():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
assert response is not None
assert 'content' in response
assert 'metadata' in response
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
time.sleep(6) # wait for 6 seconds
assert response is not None
assert 'content' in response
assert 'metadata' in response
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_crawl_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
invalid_app.crawl_url('https://firecrawl.dev')
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
def test_should_return_error_for_blocklisted_url():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
blocklisted_url = "https://twitter.com/fake-test"
with pytest.raises(Exception) as excinfo:
app.crawl_url(blocklisted_url)
assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value)
def test_crawl_url_wait_for_completion_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content']
def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
uniqueIdempotencyKey = str(uuid4())
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None assert response is not None
assert len(response) > 0 assert len(response) > 0
assert 'content' in response[0] assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content'] assert "🔥 Firecrawl" in response[0]['content']
def test_crawl_url_invalid_api_key():
invalid_app = FirecrawlApp(api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app.crawl_url(TEST_URL) app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value)
def test_check_crawl_status_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
assert response is not None
assert 'jobId' in response
time.sleep(30) # wait for 30 seconds
status_response = app.check_crawl_status(response['jobId'])
assert status_response is not None
assert 'status' in status_response
assert status_response['status'] == 'completed'
assert 'data' in status_response
assert len(status_response['data']) > 0
def test_search_e2e(): def test_search_e2e():
app = FirecrawlApp(api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.search("test query") response = app.search("test query")
assert response is not None assert response is not None
assert 'content' in response[0] assert 'content' in response[0]
assert len(response) > 2 assert len(response) > 2
def test_search_invalid_api_key(): def test_search_invalid_api_key():
invalid_app = FirecrawlApp(api_key="invalid_api_key") invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app.search("test query") invalid_app.search("test query")
assert "Failed to search. Status code: 401" in str(excinfo.value) assert "Failed to search. Status code: 401" in str(excinfo.value)
def test_crawl_with_fast_mode():
app = FirecrawlApp(api_key=TEST_API_KEY)
response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
def test_crawl_with_html_inclusion():
app = FirecrawlApp(api_key=TEST_API_KEY)
response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False)
assert response is not None
assert 'jobId' in response
def test_crawl_with_pdf_extraction():
app = FirecrawlApp(api_key=TEST_API_KEY)
response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001",
{'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False)
assert response is not None
assert 'jobId' in response
def test_timeout_during_scraping():
app = FirecrawlApp(api_key=TEST_API_KEY)
with pytest.raises(Exception) as excinfo:
app.scrape_url(TEST_URL, {'timeout': 1000})
assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value)
def test_llm_extraction(): def test_llm_extraction():
app = FirecrawlApp(api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url("https://mendable.ai", { response = app.scrape_url("https://mendable.ai", {
'extractorOptions': { 'extractorOptions': {
'mode': 'llm-extraction', 'mode': 'llm-extraction',

View File

@ -11,5 +11,6 @@ setup(
install_requires=[ install_requires=[
'requests', 'requests',
'pytest', 'pytest',
'python-dotenv',
], ],
) )