0

Merge pull request #246 from mendableai/194-sdk-ci-pipeline-for-publishing-pythonnode-sdk

[Feat] CI/CD for publishing js and python SDKs
This commit is contained in:
Rafael Miller 2024-06-06 16:53:42 -03:00 committed by GitHub
commit 4c3bfe4eb5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 359 additions and 173 deletions

View File

@ -1,24 +1,14 @@
""" """
checks local verions against published verions. checks local versions against published versions.
# Usage: # Usage:
Unix:
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
Windows:
python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js
Local version: 0.0.22 Local version: 0.0.22
Published version: 0.0.21 Published version: 0.0.21
true true
Unix:
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
Windows:
python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py
Local version: 0.0.11 Local version: 0.0.11
Published version: 0.0.11 Published version: 0.0.11
false false
@ -88,8 +78,8 @@ if __name__ == "__main__":
raise ValueError("Invalid package type. Use 'python' or 'js'.") raise ValueError("Invalid package type. Use 'python' or 'js'.")
# Print versions for debugging # Print versions for debugging
print(f"Local version: {current_version}") # print(f"Local version: {current_version}")
print(f"Published version: {published_version}") # print(f"Published version: {published_version}")
# Compare versions and print result # Compare versions and print result
if is_version_incremented(current_version, published_version): if is_version_incremented(current_version, published_version):

View File

@ -3,8 +3,6 @@ on:
push: push:
branches: branches:
- main - main
schedule:
- cron: '0 */2 * * *'
env: env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@ -25,9 +23,12 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }} TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
jobs: jobs:
pre-deploy: pre-deploy-e2e-tests:
name: Pre-deploy checks name: Pre-deploy checks
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
@ -61,7 +62,7 @@ jobs:
pre-deploy-test-suite: pre-deploy-test-suite:
name: Test Suite name: Test Suite
needs: pre-deploy needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
services: services:
redis: redis:
@ -94,10 +95,17 @@ jobs:
run: | run: |
npm run test npm run test
working-directory: ./apps/test-suite working-directory: ./apps/test-suite
- name: Set up Python ${{ matrix.python-version }}
python-sdk-tests:
name: Python SDK Tests
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: ${{ matrix.python-version }} python-version: '3.x'
- name: Install Python dependencies - name: Install Python dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
@ -107,6 +115,17 @@ jobs:
run: | run: |
pytest firecrawl/__tests__/e2e_withAuth/test.py pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk working-directory: ./apps/python-sdk
js-sdk-tests:
name: JavaScript SDK Tests
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: "20"
- name: Install dependencies for JavaScript SDK - name: Install dependencies for JavaScript SDK
run: pnpm install run: pnpm install
working-directory: ./apps/js-sdk/firecrawl working-directory: ./apps/js-sdk/firecrawl
@ -117,7 +136,7 @@ jobs:
deploy: deploy:
name: Deploy app name: Deploy app
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: pre-deploy-test-suite needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Change directory - name: Change directory
@ -126,3 +145,83 @@ jobs:
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js - run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
env: env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
build-and-publish-python-sdk:
runs-on: ubuntu-latest
needs: deploy
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine build requests packaging
- name: Run version check script
id: version_check_script
run: |
PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build the package
if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }}
run: |
python -m build
working-directory: ./apps/python-sdk
- name: Publish to PyPI
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/*
working-directory: ./apps/python-sdk
build-and-publish-js-sdk:
runs-on: ubuntu-latest
needs: deploy
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '20'
registry-url: 'https://registry.npmjs.org/'
scope: '@mendable'
always-auth: true
- name: Install pnpm
run: npm install -g pnpm
- name: Install python for running version check script
run: |
python -m pip install --upgrade pip
pip install setuptools wheel requests packaging
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build and publish to npm
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
npm run build-and-publish
working-directory: ./apps/js-sdk/firecrawl

View File

@ -1,9 +1,7 @@
name: Run JavaScript SDK E2E Tests name: Run JavaScript SDK E2E Tests
on: on: []
pull_request:
branches:
- main
env: env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}

46
.github/workflows/publish-js-sdk.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: Publish JavaScript SDK
on: []
env:
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '20'
registry-url: 'https://registry.npmjs.org/'
scope: '@mendable'
always-auth: true
- name: Install pnpm
run: npm install -g pnpm
- name: Install python for running version check script
run: |
python -m pip install --upgrade pip
pip install setuptools wheel requests packaging
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build and publish to npm
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
npm run build-and-publish
working-directory: ./apps/js-sdk/firecrawl

View File

@ -0,0 +1,47 @@
name: Publish Python SDK
on: []
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine build requests packaging
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build the package
if: ${{ env.VERSION_INCREMENTED == 'true' }}
run: |
python -m build
working-directory: ./apps/python-sdk
- name: Publish to PyPI
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/*
working-directory: ./apps/python-sdk

View File

@ -1,9 +1,7 @@
name: Run Python SDK E2E Tests name: Run Python SDK E2E Tests
on: on: []
pull_request:
branches:
- main
env: env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}

View File

@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => {
delete process.env.USE_DB_AUTHENTICATION; delete process.env.USE_DB_AUTHENTICATION;
}); });
describe("GET /", () => { describe("GET /", () => {
it("should return Hello, world! message", async () => { it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/"); const response = await request(TEST_URL).get("/");
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("GET /test", () => { describe("GET /test", () => {
it("should return Hello, world! message", async () => { it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/test"); const response = await request(TEST_URL).get("/test");
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
expect(response.text).toContain("Hello, world!"); expect(response.text).toContain("Hello, world!");
@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("POST /v0/scrape", () => { describe("POST /v0/scrape", () => {
it("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(app).post("/v0/scrape"); const response = await request(app).post("/v0/scrape");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error for a blocklisted URL", async () => { it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://facebook.com/fake-test"; const blocklistedUrl = "https://facebook.com/fake-test";
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => {
); );
}); });
it("should return a successful response with a valid preview token", async () => { // tested on rate limit test
const response = await request(TEST_URL) // it.concurrent("should return a successful response with a valid preview token", async () => {
.post("/v0/scrape") // const response = await request(TEST_URL)
.set("Authorization", `Bearer this_is_just_a_preview_token`) // .post("/v0/scrape")
.set("Content-Type", "application/json") // .set("Authorization", `Bearer this_is_just_a_preview_token`)
.send({ url: "https://roastmywebsite.ai" }); // .set("Content-Type", "application/json")
expect(response.statusCode).toBe(200); // .send({ url: "https://roastmywebsite.ai" });
}, 30000); // 30 seconds timeout // expect(response.statusCode).toBe(200);
// }, 30000); // 30 seconds timeout
it("should return a successful response with a valid API key", async () => { it.concurrent("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" }); .send({ url: "https://roastmywebsite.ai" });
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data"); expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("content"); expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("markdown"); expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("metadata"); expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data).not.toHaveProperty("html"); expect(response.body.data).not.toHaveProperty("html");
expect(response.body.data.content).toContain("🔥 Firecrawl"); expect(response.body.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
it("should return a successful response with a valid API key and includeHtml set to true", async () => { it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ .send({
url: "https://firecrawl.dev", url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true }, pageOptions: { includeHtml: true },
}); });
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("markdown"); expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("html"); expect(response.body.data).toHaveProperty("html");
expect(response.body.data).toHaveProperty("metadata"); expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain("🔥 Firecrawl"); expect(response.body.data.content).toContain("_Roast_");
expect(response.body.data.markdown).toContain("🔥 Firecrawl"); expect(response.body.data.markdown).toContain("_Roast_");
expect(response.body.data.html).toContain("<h1"); expect(response.body.data.html).toContain("<h1");
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
it('should return a successful response for a valid scrape with PDF file', async () => { it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 60000); // 60 seconds }, 60000); // 60 seconds
it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => { it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
// TODO: add this test back once we nail the waitFor option to be more deterministic // TODO: add this test back once we nail the waitFor option to be more deterministic
// it("should return a successful response with a valid API key and waitFor option", async () => { // it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
// const startTime = Date.now(); // const startTime = Date.now();
// const response = await request(TEST_URL) // const response = await request(TEST_URL)
// .post("/v0/scrape") // .post("/v0/scrape")
@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("POST /v0/crawl", () => { describe("POST /v0/crawl", () => {
it("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl"); const response = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error for a blocklisted URL", async () => { it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://twitter.com/fake-test"; const blocklistedUrl = "https://twitter.com/fake-test";
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => {
); );
}); });
it("should return a successful response with a valid API key for crawl", async () => { it.concurrent("should return a successful response with a valid API key for crawl", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
); );
}); });
it('should prevent duplicate requests using the same idempotency key', async () => { it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4(); const uniqueIdempotencyKey = uuidv4();
// First request with the idempotency key // First request with the idempotency key
@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => {
expect(secondResponse.body.error).toBe('Idempotency key already used'); expect(secondResponse.body.error).toBe('Idempotency key already used');
}); });
it("should return a successful response with a valid API key and valid includes option", async () => { it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => {
); );
expect(urls.length).toBeGreaterThan(5); expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => { urls.forEach((url: string) => {
console.log({url})
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy(); expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
}); });
@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable"); expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds }, 60000); // 60 seconds
it("should return a successful response with a valid API key and valid excludes option", async () => { it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => {
}); });
}, 90000); // 90 seconds }, 90000); // 90 seconds
it("should return a successful response with a valid API key and limit to 3", async () => { it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable"); expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds }, 60000); // 60 seconds
it("should return a successful response with max depth option for a valid crawl job", async () => { it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => {
}); });
}, 120000); }, 120000);
// it("should return a successful response with a valid API key and valid limit option", async () => { // it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
// const crawlResponse = await request(TEST_URL) // const crawlResponse = await request(TEST_URL)
// .post("/v0/crawl") // .post("/v0/crawl")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => {
// expect(completedResponse.body.data[0].content).not.toContain("main menu"); // expect(completedResponse.body.data[0].content).not.toContain("main menu");
// }, 60000); // 60 seconds // }, 60000); // 60 seconds
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ .send({
url: "https://firecrawl.dev", url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true }, pageOptions: { includeHtml: true },
}); });
expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => {
// 120 seconds // 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html"); expect(completedResponse.body.data[0]).toHaveProperty("html");
expect(completedResponse.body.data[0]).toHaveProperty("metadata"); expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); expect(completedResponse.body.data[0].content).toContain("_Roast_");
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl"); expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain("<h1"); expect(completedResponse.body.data[0].html).toContain("<h1");
}, 60000); }, 60000);
}); });
describe("POST /v0/crawlWebsitePreview", () => { describe("POST /v0/crawlWebsitePreview", () => {
it("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview"); const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview") .post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
// it("should return an error for a blocklisted URL", async () => { // it.concurrent("should return an error for a blocklisted URL", async () => {
// const blocklistedUrl = "https://instagram.com/fake-test"; // const blocklistedUrl = "https://instagram.com/fake-test";
// const response = await request(TEST_URL) // const response = await request(TEST_URL)
// .post("/v0/crawlWebsitePreview") // .post("/v0/crawlWebsitePreview")
@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."); // expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
// }); // });
it("should return a timeout error when scraping takes longer than the specified timeout", async () => { it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -529,27 +529,27 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(408); expect(response.statusCode).toBe(408);
}, 3000); }, 3000);
it("should return a successful response with a valid API key for crawlWebsitePreview", async () => { // it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
const response = await request(TEST_URL) // const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview") // .post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer this_is_just_a_preview_token`) // .set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json") // .set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" }); // .send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200); // expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId"); // expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch( // expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ // /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
); // );
}); // });
}); });
describe("POST /v0/search", () => { describe("POST /v0/search", () => {
it("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/search"); const response = await request(TEST_URL).post("/v0/search");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/search") .post("/v0/search")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return a successful response with a valid API key for search", async () => { it.concurrent("should return a successful response with a valid API key for search", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/search") .post("/v0/search")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("GET /v0/crawl/status/:jobId", () => { describe("GET /v0/crawl/status/:jobId", () => {
it("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).get("/v0/crawl/status/123"); const response = await request(TEST_URL).get("/v0/crawl/status/123");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.get("/v0/crawl/status/123") .get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`); .set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it("should return Job not found for invalid job ID", async () => { it.concurrent("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.get("/v0/crawl/status/invalidJobId") .get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404); expect(response.statusCode).toBe(404);
}); });
it("should return a successful crawl status response for a valid crawl job", async () => { it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" }); .send({ url: "https://roastmywebsite.ai" });
expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
let isCompleted = false; let isCompleted = false;
@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content"); expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown"); expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata"); expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); expect(completedResponse.body.data[0].content).toContain("_Roast_");
}, 60000); // 60 seconds }, 120000); // 120 seconds
it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => { it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post('/v0/crawl') .post('/v0/crawl')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => {
}) })
]) ])
); );
}, 60000); // 60 seconds }, 120000); // 120 seconds
it("should return a successful response with max depth option for a valid crawl job", async () => { it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -705,15 +705,15 @@ describe("E2E Tests for API Routes", () => {
const depth = new URL(url).pathname.split("/").filter(Boolean).length; const depth = new URL(url).pathname.split("/").filter(Boolean).length;
expect(depth).toBeLessThanOrEqual(1); expect(depth).toBeLessThanOrEqual(1);
}); });
}, 120000); }, 180000);
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => { it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ .send({
url: "https://firecrawl.dev", url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true }, pageOptions: { includeHtml: true },
}); });
expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => {
expect(response.body).toHaveProperty("status"); expect(response.body).toHaveProperty("status");
expect(response.body.status).toBe("active"); expect(response.body.status).toBe("active");
// wait for 30 seconds let isFinished = false;
await new Promise((r) => setTimeout(r, 30000)); let completedResponse;
const completedResponse = await request(TEST_URL) while (!isFinished) {
const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`) .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
if (response.body.status === "completed") {
isFinished = true;
completedResponse = response;
} else {
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
}
}
expect(completedResponse.statusCode).toBe(200); expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status"); expect(completedResponse.body).toHaveProperty("status");
@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content"); expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown"); expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata"); expect(completedResponse.body.data[0]).toHaveProperty("metadata");
// 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html"); expect(completedResponse.body.data[0]).toHaveProperty("html");
expect(completedResponse.body.data[0]).toHaveProperty("metadata"); expect(completedResponse.body.data[0].content).toContain("_Roast_");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl"); expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
expect(completedResponse.body.data[0].html).toContain("<h1"); expect(completedResponse.body.data[0].html).toContain("<h1");
}, 60000); }, 60000);
}); // 60 seconds }); // 60 seconds
it("If someone cancels a crawl job, it should turn into failed status", async () => { it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
describe("POST /v0/scrape with LLM Extraction", () => { describe("POST /v0/scrape with LLM Extraction", () => {
it("should extract data using LLM extraction mode", async () => { it.concurrent("should extract data using LLM extraction mode", async () => {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
}); });
// describe("POST /v0/scrape for Top 100 Companies", () => { // describe("POST /v0/scrape for Top 100 Companies", () => {
// it("should extract data for the top 100 companies", async () => { // it.concurrent("should extract data for the top 100 companies", async () => {
// const response = await request(TEST_URL) // const response = await request(TEST_URL)
// .post("/v0/scrape") // .post("/v0/scrape")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => {
// }); // });
describe("POST /v0/crawl with fast mode", () => { describe("POST /v0/crawl with fast mode", () => {
it("should complete the crawl under 20 seconds", async () => { it.concurrent("should complete the crawl under 20 seconds", async () => {
const startTime = Date.now(); const startTime = Date.now();
const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => {
} }
} }
const endTime = Date.now(); // const endTime = Date.now();
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
console.log(`Time elapsed: ${timeElapsed} seconds`); // console.log(`Time elapsed: ${timeElapsed} seconds`);
expect(statusResponse.body.status).toBe("completed"); expect(statusResponse.body.status).toBe("completed");
expect(statusResponse.body).toHaveProperty("data"); expect(statusResponse.body).toHaveProperty("data");
@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => {
}, 20000); }, 20000);
// it("should complete the crawl in more than 10 seconds", async () => { // it.concurrent("should complete the crawl in more than 10 seconds", async () => {
// const startTime = Date.now(); // const startTime = Date.now();
// const crawlResponse = await request(TEST_URL) // const crawlResponse = await request(TEST_URL)
@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("GET /is-production", () => { describe("GET /is-production", () => {
it("should return the production status", async () => { it.concurrent("should return the production status", async () => {
const response = await request(TEST_URL).get("/is-production"); const response = await request(TEST_URL).get("/is-production");
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction"); expect(response.body).toHaveProperty("isProduction");
@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => {
}); });
describe("Rate Limiter", () => { describe("Rate Limiter", () => {
it("should return 429 when rate limit is exceeded for preview token", async () => { it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
for (let i = 0; i < 4; i++) { for (let i = 0; i < 5; i++) {
const response = await request(TEST_URL) const response = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`) .set("Authorization", `Bearer this_is_just_a_preview_token`)
@ -1020,10 +1028,10 @@ describe("E2E Tests for API Routes", () => {
.send({ url: "https://www.scrapethissite.com" }); .send({ url: "https://www.scrapethissite.com" });
expect(response.statusCode).toBe(429); expect(response.statusCode).toBe(429);
}, 60000); }, 90000);
}); });
// it("should return 429 when rate limit is exceeded for API key", async () => { // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) { // for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
// const response = await request(TEST_URL) // const response = await request(TEST_URL)
// .post("/v0/scrape") // .post("/v0/scrape")
@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.statusCode).toBe(429); // expect(response.statusCode).toBe(429);
// }, 60000); // }, 60000);
// it("should return 429 when rate limit is exceeded for API key", async () => { // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) { // for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
// const response = await request(TEST_URL) // const response = await request(TEST_URL)
// .post("/v0/crawl") // .post("/v0/crawl")

View File

@ -1,13 +1,13 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.23", "version": "0.0.25",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js", "main": "build/index.js",
"types": "types/index.d.ts", "types": "types/index.d.ts",
"type": "module", "type": "module",
"scripts": { "scripts": {
"build": "tsc", "build": "tsc",
"publish": "npm run build && npm publish --access public", "build-and-publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta", "publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/__tests__/**/*.test.ts" "test": "jest src/__tests__/**/*.test.ts"
}, },

View File

@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL; const API_URL = process.env.API_URL;
describe('FirecrawlApp E2E Tests', () => { describe('FirecrawlApp E2E Tests', () => {
test('should throw error for no API key', () => { test.concurrent('should throw error for no API key', () => {
expect(() => { expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided"); }).toThrow("No API key provided");
}); });
test('should throw error for invalid API key on scrape', async () => { test.concurrent('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
}); });
test('should throw error for blocklisted URL on scrape', async () => { test.concurrent('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test"; const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
}); });
test('should return successful response with valid preview token', async () => { test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev'); const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl"); expect(response.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should return successful response for valid scrape', async () => { test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev'); const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl"); expect(response.data.content).toContain("_Roast_");
expect(response.data).toHaveProperty('markdown'); expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata'); expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html'); expect(response.data).not.toHaveProperty('html');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should return successful response with valid API key and include HTML', async () => { test.concurrent('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl"); expect(response.data.content).toContain("_Roast_");
expect(response.data.markdown).toContain("🔥 Firecrawl"); expect(response.data.markdown).toContain("_Roast_");
expect(response.data.html).toContain("<h1"); expect(response.data.html).toContain("<h1");
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file', async () => { test.concurrent('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file without explicit extension', async () => { test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should throw error for invalid API key on crawl', async () => { test.concurrent('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
}); });
test('should throw error for blocklisted URL on crawl', async () => { test.concurrent('should throw error for blocklisted URL on crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://twitter.com/fake-test"; const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
}); });
test('should return successful response for crawl and wait for completion', async () => { test.concurrent('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30); const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl"); expect(response[0].content).toContain("_Roast_");
}, 60000); // 60 seconds timeout }, 60000); // 60 seconds timeout
test('should handle idempotency key for crawl', async () => { test.concurrent('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4(); const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey); const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.jobId).toBeDefined(); expect(response.jobId).toBeDefined();
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
}); });
test('should check crawl status', async () => { test.concurrent('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.jobId).toBeDefined(); expect(response.jobId).toBeDefined();
@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(statusResponse.data.length).toBeGreaterThan(0); expect(statusResponse.data.length).toBeGreaterThan(0);
}, 35000); // 35 seconds timeout }, 35000); // 35 seconds timeout
test('should return successful response for search', async () => { test.concurrent('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.search("test query"); const response = await app.search("test query");
expect(response).not.toBeNull(); expect(response).not.toBeNull();
@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data.length).toBeGreaterThan(2); expect(response.data.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
test('should throw error for invalid API key on search', async () => { test.concurrent('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
}); });
test('should perform LLM extraction', async () => { test.concurrent('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl("https://mendable.ai", { const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: { extractorOptions: {

View File

@ -1,3 +1,3 @@
from .firecrawl import FirecrawlApp from .firecrawl import FirecrawlApp
__version__ = "0.0.11" __version__ = "0.0.13"

View File

@ -38,31 +38,31 @@ def test_blocklisted_url():
def test_successful_response_with_valid_preview_token(): def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
response = app.scrape_url('https://firecrawl.dev') response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
assert "🔥 Firecrawl" in response['content'] assert "_Roast_" in response['content']
def test_scrape_url_e2e(): def test_scrape_url_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://firecrawl.dev') response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
assert 'markdown' in response assert 'markdown' in response
assert 'metadata' in response assert 'metadata' in response
assert 'html' not in response assert 'html' not in response
assert "🔥 Firecrawl" in response['content'] assert "_Roast_" in response['content']
def test_successful_response_with_valid_api_key_and_include_html(): def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}}) response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
assert 'markdown' in response assert 'markdown' in response
assert 'html' in response assert 'html' in response
assert 'metadata' in response assert 'metadata' in response
assert "🔥 Firecrawl" in response['content'] assert "_Roast_" in response['content']
assert "🔥 Firecrawl" in response['markdown'] assert "_Roast_" in response['markdown']
assert "<h1" in response['html'] assert "<h1" in response['html']
def test_successful_response_for_valid_scrape_with_pdf_file(): def test_successful_response_for_valid_scrape_with_pdf_file():
@ -97,20 +97,20 @@ def test_should_return_error_for_blocklisted_url():
def test_crawl_url_wait_for_completion_e2e(): def test_crawl_url_wait_for_completion_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True) response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None assert response is not None
assert len(response) > 0 assert len(response) > 0
assert 'content' in response[0] assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content'] assert "_Roast_" in response[0]['content']
def test_crawl_url_with_idempotency_key_e2e(): def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
uniqueIdempotencyKey = str(uuid4()) uniqueIdempotencyKey = str(uuid4())
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None assert response is not None
assert len(response) > 0 assert len(response) > 0
assert 'content' in response[0] assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content'] assert "_Roast_" in response[0]['content']
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)