From 8132f22c73762e8092df0fe172817397ff2f833e Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Thu, 6 Jun 2024 15:36:20 -0300
Subject: [PATCH] nice
---
.github/workflows/fly.yml | 127 +++++++++++--
.github/workflows/publish-js-sdk.yml | 5 +-
.github/workflows/publish-python-sdk.yml | 5 +-
.../src/__tests__/e2e_withAuth/index.test.ts | 170 +++++++++---------
.../src/__tests__/e2e_withAuth/index.test.ts | 60 +++----
.../firecrawl/__tests__/e2e_withAuth/test.py | 22 +--
6 files changed, 245 insertions(+), 144 deletions(-)
diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml
index 3950c2e..957a707 100644
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@@ -3,8 +3,6 @@ on:
push:
branches:
- main
- schedule:
- - cron: '0 */2 * * *'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -25,9 +23,12 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+ PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
jobs:
- pre-deploy:
+ pre-deploy-e2e-tests:
name: Pre-deploy checks
runs-on: ubuntu-latest
services:
@@ -61,7 +62,7 @@ jobs:
pre-deploy-test-suite:
name: Test Suite
- needs: pre-deploy
+ needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
services:
redis:
@@ -94,19 +95,37 @@ jobs:
run: |
npm run test
working-directory: ./apps/test-suite
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install Python dependencies
- run: |
- python -m pip install --upgrade pip
+
+ python-sdk-tests:
+ name: Python SDK Tests
+ needs: pre-deploy-e2e-tests
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.x'
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
pip install -r requirements.txt
working-directory: ./apps/python-sdk
- name: Run E2E tests for Python SDK
- run: |
- pytest firecrawl/__tests__/e2e_withAuth/test.py
+ run: |
+ pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
+
+ js-sdk-tests:
+ name: JavaScript SDK Tests
+ needs: pre-deploy-e2e-tests
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: "20"
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
@@ -117,7 +136,7 @@ jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
- needs: pre-deploy-test-suite
+ needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests]
steps:
- uses: actions/checkout@v3
- name: Change directory
@@ -126,3 +145,83 @@ jobs:
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+
+ build-and-publish-python-sdk:
+ runs-on: ubuntu-latest
+ needs: deploy
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.x'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine build requests packaging
+
+ - name: Run version check script
+ id: version_check_script
+ run: |
+ PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
+ echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV
+
+ - name: Build the package
+ if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }}
+ run: |
+ python -m build
+ working-directory: ./apps/python-sdk
+
+ - name: Publish to PyPI
+ if: ${{ env.VERSION_INCREMENTED == 'true' }}
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: |
+ twine upload dist/*
+ working-directory: ./apps/python-sdk
+
+ build-and-publish-js-sdk:
+ runs-on: ubuntu-latest
+ needs: deploy
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: '20'
+ registry-url: 'https://registry.npmjs.org/'
+ scope: '@mendable'
+ always-auth: true
+
+ - name: Install pnpm
+ run: npm install -g pnpm
+
+ - name: Install python for running version check script
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel requests packaging
+
+ - name: Install dependencies for JavaScript SDK
+ run: pnpm install
+ working-directory: ./apps/js-sdk/firecrawl
+
+ - name: Run version check script
+ id: version_check_script
+ run: |
+ VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
+ echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
+
+ - name: Build and publish to npm
+ if: ${{ env.VERSION_INCREMENTED == 'true' }}
+ env:
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+ run: |
+ npm run build-and-publish
+ working-directory: ./apps/js-sdk/firecrawl
+
\ No newline at end of file
diff --git a/.github/workflows/publish-js-sdk.yml b/.github/workflows/publish-js-sdk.yml
index 651d33b..c02a654 100644
--- a/.github/workflows/publish-js-sdk.yml
+++ b/.github/workflows/publish-js-sdk.yml
@@ -1,9 +1,6 @@
name: Publish JavaScript SDK
-on:
- pull_request:
- branches:
- - main
+on: []
env:
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
diff --git a/.github/workflows/publish-python-sdk.yml b/.github/workflows/publish-python-sdk.yml
index dd02787..6d86f1e 100644
--- a/.github/workflows/publish-python-sdk.yml
+++ b/.github/workflows/publish-python-sdk.yml
@@ -1,9 +1,6 @@
name: Publish Python SDK
-on:
- pull_request:
- branches:
- - main
+on: []
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
index f015acd..69e5316 100644
--- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
@@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => {
delete process.env.USE_DB_AUTHENTICATION;
});
describe("GET /", () => {
- it("should return Hello, world! message", async () => {
+ it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/");
expect(response.statusCode).toBe(200);
@@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /test", () => {
- it("should return Hello, world! message", async () => {
+ it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/test");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("Hello, world!");
@@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => {
});
describe("POST /v0/scrape", () => {
- it("should require authorization", async () => {
+ it.concurrent("should require authorization", async () => {
const response = await request(app).post("/v0/scrape");
expect(response.statusCode).toBe(401);
});
- it("should return an error response with an invalid API key", async () => {
+ it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer invalid-api-key`)
@@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it("should return an error for a blocklisted URL", async () => {
+ it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://facebook.com/fake-test";
const response = await request(TEST_URL)
.post("/v0/scrape")
@@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => {
);
});
- it("should return a successful response with a valid preview token", async () => {
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer this_is_just_a_preview_token`)
- .set("Content-Type", "application/json")
- .send({ url: "https://roastmywebsite.ai" });
- expect(response.statusCode).toBe(200);
- }, 30000); // 30 seconds timeout
+ // tested on rate limit test
+ // it.concurrent("should return a successful response with a valid preview token", async () => {
+ // const response = await request(TEST_URL)
+ // .post("/v0/scrape")
+ // .set("Authorization", `Bearer this_is_just_a_preview_token`)
+ // .set("Content-Type", "application/json")
+ // .send({ url: "https://roastmywebsite.ai" });
+ // expect(response.statusCode).toBe(200);
+ // }, 30000); // 30 seconds timeout
- it("should return a successful response with a valid API key", async () => {
+ it.concurrent("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
+ .send({ url: "https://roastmywebsite.ai" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data).not.toHaveProperty("html");
- expect(response.body.data.content).toContain("🔥 Firecrawl");
+ expect(response.body.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout
- it("should return a successful response with a valid API key and includeHtml set to true", async () => {
+ it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
- url: "https://firecrawl.dev",
+ url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(response.statusCode).toBe(200);
@@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("html");
expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data.content).toContain("🔥 Firecrawl");
- expect(response.body.data.markdown).toContain("🔥 Firecrawl");
+ expect(response.body.data.content).toContain("_Roast_");
+ expect(response.body.data.markdown).toContain("_Roast_");
expect(response.body.data.html).toContain("
{
+ it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 60000); // 60 seconds
- it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
+ it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds
// TODO: add this test back once we nail the waitFor option to be more deterministic
- // it("should return a successful response with a valid API key and waitFor option", async () => {
+ // it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
// const startTime = Date.now();
// const response = await request(TEST_URL)
// .post("/v0/scrape")
@@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => {
});
describe("POST /v0/crawl", () => {
- it("should require authorization", async () => {
+ it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).toBe(401);
});
- it("should return an error response with an invalid API key", async () => {
+ it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer invalid-api-key`)
@@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it("should return an error for a blocklisted URL", async () => {
+ it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://twitter.com/fake-test";
const response = await request(TEST_URL)
.post("/v0/crawl")
@@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => {
);
});
- it("should return a successful response with a valid API key for crawl", async () => {
+ it.concurrent("should return a successful response with a valid API key for crawl", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
- it('should prevent duplicate requests using the same idempotency key', async () => {
+ it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4();
// First request with the idempotency key
@@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => {
expect(secondResponse.body.error).toBe('Idempotency key already used');
});
- it("should return a successful response with a valid API key and valid includes option", async () => {
+ it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => {
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
- console.log({url})
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
});
@@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds
- it("should return a successful response with a valid API key and valid excludes option", async () => {
+ it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => {
});
}, 90000); // 90 seconds
- it("should return a successful response with a valid API key and limit to 3", async () => {
+ it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds
- it("should return a successful response with max depth option for a valid crawl job", async () => {
+ it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => {
});
}, 120000);
- // it("should return a successful response with a valid API key and valid limit option", async () => {
+ // it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
// const crawlResponse = await request(TEST_URL)
// .post("/v0/crawl")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => {
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
// }, 60000); // 60 seconds
- it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
+ it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
- url: "https://firecrawl.dev",
+ url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => {
// 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
- expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
+ expect(completedResponse.body.data[0].content).toContain("_Roast_");
+ expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain(" {
- it("should require authorization", async () => {
+ it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
expect(response.statusCode).toBe(401);
});
- it("should return an error response with an invalid API key", async () => {
+ it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer invalid-api-key`)
@@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- // it("should return an error for a blocklisted URL", async () => {
+ // it.concurrent("should return an error for a blocklisted URL", async () => {
// const blocklistedUrl = "https://instagram.com/fake-test";
// const response = await request(TEST_URL)
// .post("/v0/crawlWebsitePreview")
@@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
// });
- it("should return a timeout error when scraping takes longer than the specified timeout", async () => {
+ it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -529,7 +529,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(408);
}, 3000);
- it("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
+ it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
@@ -544,12 +544,12 @@ describe("E2E Tests for API Routes", () => {
});
describe("POST /v0/search", () => {
- it("should require authorization", async () => {
+ it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/search");
expect(response.statusCode).toBe(401);
});
- it("should return an error response with an invalid API key", async () => {
+ it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/search")
.set("Authorization", `Bearer invalid-api-key`)
@@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it("should return a successful response with a valid API key for search", async () => {
+ it.concurrent("should return a successful response with a valid API key for search", async () => {
const response = await request(TEST_URL)
.post("/v0/search")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /v0/crawl/status/:jobId", () => {
- it("should require authorization", async () => {
+ it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).get("/v0/crawl/status/123");
expect(response.statusCode).toBe(401);
});
- it("should return an error response with an invalid API key", async () => {
+ it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
});
- it("should return Job not found for invalid job ID", async () => {
+ it.concurrent("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
});
- it("should return a successful crawl status response for a valid crawl job", async () => {
+ it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
+ .send({ url: "https://roastmywebsite.ai" });
expect(crawlResponse.statusCode).toBe(200);
let isCompleted = false;
@@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
- }, 60000); // 60 seconds
+ expect(completedResponse.body.data[0].content).toContain("_Roast_");
+ }, 120000); // 120 seconds
- it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
+ it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
const crawlResponse = await request(TEST_URL)
.post('/v0/crawl')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => {
})
])
);
- }, 60000); // 60 seconds
+ }, 120000); // 120 seconds
- it("should return a successful response with max depth option for a valid crawl job", async () => {
+ it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -707,13 +707,13 @@ describe("E2E Tests for API Routes", () => {
});
}, 120000);
- it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
+ it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
- url: "https://firecrawl.dev",
+ url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => {
expect(response.body).toHaveProperty("status");
expect(response.body.status).toBe("active");
- // wait for 30 seconds
- await new Promise((r) => setTimeout(r, 30000));
+ let isFinished = false;
+ let completedResponse;
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ while (!isFinished) {
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+
+ if (response.body.status === "completed") {
+ isFinished = true;
+ completedResponse = response;
+ } else {
+ await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ }
+ }
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
@@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
-
- // 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
- expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
+ expect(completedResponse.body.data[0].content).toContain("_Roast_");
+ expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain(" {
+ it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds
describe("POST /v0/scrape with LLM Extraction", () => {
- it("should extract data using LLM extraction mode", async () => {
+ it.concurrent("should extract data using LLM extraction mode", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
});
// describe("POST /v0/scrape for Top 100 Companies", () => {
- // it("should extract data for the top 100 companies", async () => {
+ // it.concurrent("should extract data for the top 100 companies", async () => {
// const response = await request(TEST_URL)
// .post("/v0/scrape")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => {
// });
describe("POST /v0/crawl with fast mode", () => {
- it("should complete the crawl under 20 seconds", async () => {
+ it.concurrent("should complete the crawl under 20 seconds", async () => {
const startTime = Date.now();
const crawlResponse = await request(TEST_URL)
@@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => {
}
}
- const endTime = Date.now();
- const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
+ // const endTime = Date.now();
+ // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
- console.log(`Time elapsed: ${timeElapsed} seconds`);
+ // console.log(`Time elapsed: ${timeElapsed} seconds`);
expect(statusResponse.body.status).toBe("completed");
expect(statusResponse.body).toHaveProperty("data");
@@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => {
}, 20000);
- // it("should complete the crawl in more than 10 seconds", async () => {
+ // it.concurrent("should complete the crawl in more than 10 seconds", async () => {
// const startTime = Date.now();
// const crawlResponse = await request(TEST_URL)
@@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /is-production", () => {
- it("should return the production status", async () => {
+ it.concurrent("should return the production status", async () => {
const response = await request(TEST_URL).get("/is-production");
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction");
@@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => {
});
describe("Rate Limiter", () => {
- it("should return 429 when rate limit is exceeded for preview token", async () => {
- for (let i = 0; i < 4; i++) {
+ it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
+ for (let i = 0; i < 5; i++) {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
@@ -1023,7 +1031,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000);
});
- // it("should return 429 when rate limit is exceeded for API key", async () => {
+ // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
// const response = await request(TEST_URL)
// .post("/v0/scrape")
@@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.statusCode).toBe(429);
// }, 60000);
- // it("should return 429 when rate limit is exceeded for API key", async () => {
+ // it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
// const response = await request(TEST_URL)
// .post("/v0/crawl")
diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
index 16b7ab7..c7dde69 100644
--- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
@@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL;
describe('FirecrawlApp E2E Tests', () => {
- test('should throw error for no API key', () => {
+ test.concurrent('should throw error for no API key', () => {
expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided");
});
- test('should throw error for invalid API key on scrape', async () => {
+ test.concurrent('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
- await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
});
- test('should throw error for blocklisted URL on scrape', async () => {
+ test.concurrent('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
- test('should return successful response with valid preview token', async () => {
+ test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
- const response = await app.scrapeUrl('https://firecrawl.dev');
+ const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull();
- expect(response.data.content).toContain("🔥 Firecrawl");
+ expect(response.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout
- test('should return successful response for valid scrape', async () => {
+ test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
- const response = await app.scrapeUrl('https://firecrawl.dev');
+ const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull();
- expect(response.data.content).toContain("🔥 Firecrawl");
+ expect(response.data.content).toContain("_Roast_");
expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html');
}, 30000); // 30 seconds timeout
- test('should return successful response with valid API key and include HTML', async () => {
+ test.concurrent('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
- const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
+ const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
expect(response).not.toBeNull();
- expect(response.data.content).toContain("🔥 Firecrawl");
- expect(response.data.markdown).toContain("🔥 Firecrawl");
+ expect(response.data.content).toContain("_Roast_");
+ expect(response.data.markdown).toContain("_Roast_");
expect(response.data.html).toContain(" {
+ test.concurrent('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
- test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
+ test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
- test('should throw error for invalid API key on crawl', async () => {
+ test.concurrent('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
- await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
});
- test('should throw error for blocklisted URL on crawl', async () => {
+ test.concurrent('should throw error for blocklisted URL on crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
- test('should return successful response for crawl and wait for completion', async () => {
+ test.concurrent('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
- const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
+ const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
expect(response).not.toBeNull();
- expect(response[0].content).toContain("🔥 Firecrawl");
+ expect(response[0].content).toContain("_Roast_");
}, 60000); // 60 seconds timeout
- test('should handle idempotency key for crawl', async () => {
+ test.concurrent('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
- const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
+ const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
- await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
+ await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
});
- test('should check crawl status', async () => {
+ test.concurrent('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
- const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
+ const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
@@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(statusResponse.data.length).toBeGreaterThan(0);
}, 35000); // 35 seconds timeout
- test('should return successful response for search', async () => {
+ test.concurrent('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.search("test query");
expect(response).not.toBeNull();
@@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout
- test('should throw error for invalid API key on search', async () => {
+ test.concurrent('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
});
- test('should perform LLM extraction', async () => {
+ test.concurrent('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
index 86ce1f9..90a6498 100644
--- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
+++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
@@ -38,31 +38,31 @@ def test_blocklisted_url():
def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
- response = app.scrape_url('https://firecrawl.dev')
+ response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' in response
- assert "🔥 Firecrawl" in response['content']
+ assert "_Roast_" in response['content']
def test_scrape_url_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
- response = app.scrape_url('https://firecrawl.dev')
+ response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' in response
assert 'markdown' in response
assert 'metadata' in response
assert 'html' not in response
- assert "🔥 Firecrawl" in response['content']
+ assert "_Roast_" in response['content']
def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
- response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
+ response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
assert response is not None
assert 'content' in response
assert 'markdown' in response
assert 'html' in response
assert 'metadata' in response
- assert "🔥 Firecrawl" in response['content']
- assert "🔥 Firecrawl" in response['markdown']
+ assert "_Roast_" in response['content']
+ assert "_Roast_" in response['markdown']
assert " 0
assert 'content' in response[0]
- assert "🔥 Firecrawl" in response[0]['content']
+ assert "_Roast_" in response[0]['content']
def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
uniqueIdempotencyKey = str(uuid4())
- response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
+ response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
- assert "🔥 Firecrawl" in response[0]['content']
+ assert "_Roast_" in response[0]['content']
with pytest.raises(Exception) as excinfo:
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)