0

Merge pull request #246 from mendableai/194-sdk-ci-pipeline-for-publishing-pythonnode-sdk

[Feat] CI/CD for publishing js and python SDKs
This commit is contained in:
Rafael Miller 2024-06-06 16:53:42 -03:00 committed by GitHub
commit 4c3bfe4eb5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 359 additions and 173 deletions

View File

@ -1,24 +1,14 @@
"""
checks local verions against published verions.
checks local versions against published versions.
# Usage:
Unix:
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
Windows:
python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js
Local version: 0.0.22
Published version: 0.0.21
true
Unix:
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
Windows:
python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py
Local version: 0.0.11
Published version: 0.0.11
false
@ -88,8 +78,8 @@ if __name__ == "__main__":
raise ValueError("Invalid package type. Use 'python' or 'js'.")
# Print versions for debugging
print(f"Local version: {current_version}")
print(f"Published version: {published_version}")
# print(f"Local version: {current_version}")
# print(f"Published version: {published_version}")
# Compare versions and print result
if is_version_incremented(current_version, published_version):

View File

@ -3,8 +3,6 @@ on:
push:
branches:
- main
schedule:
- cron: '0 */2 * * *'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@ -25,9 +23,12 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
jobs:
pre-deploy:
pre-deploy-e2e-tests:
name: Pre-deploy checks
runs-on: ubuntu-latest
services:
@ -61,7 +62,7 @@ jobs:
pre-deploy-test-suite:
name: Test Suite
needs: pre-deploy
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
services:
redis:
@ -94,10 +95,17 @@ jobs:
run: |
npm run test
working-directory: ./apps/test-suite
- name: Set up Python ${{ matrix.python-version }}
python-sdk-tests:
name: Python SDK Tests
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
python-version: '3.x'
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
@ -107,6 +115,17 @@ jobs:
run: |
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
js-sdk-tests:
name: JavaScript SDK Tests
needs: pre-deploy-e2e-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: "20"
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
@ -117,7 +136,7 @@ jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
needs: pre-deploy-test-suite
needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests]
steps:
- uses: actions/checkout@v3
- name: Change directory
@ -126,3 +145,83 @@ jobs:
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
build-and-publish-python-sdk:
runs-on: ubuntu-latest
needs: deploy
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine build requests packaging
- name: Run version check script
id: version_check_script
run: |
PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build the package
if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }}
run: |
python -m build
working-directory: ./apps/python-sdk
- name: Publish to PyPI
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/*
working-directory: ./apps/python-sdk
build-and-publish-js-sdk:
runs-on: ubuntu-latest
needs: deploy
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '20'
registry-url: 'https://registry.npmjs.org/'
scope: '@mendable'
always-auth: true
- name: Install pnpm
run: npm install -g pnpm
- name: Install python for running version check script
run: |
python -m pip install --upgrade pip
pip install setuptools wheel requests packaging
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build and publish to npm
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
npm run build-and-publish
working-directory: ./apps/js-sdk/firecrawl

View File

@ -1,9 +1,7 @@
name: Run JavaScript SDK E2E Tests
on:
pull_request:
branches:
- main
on: []
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}

46
.github/workflows/publish-js-sdk.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: Publish JavaScript SDK
on: []
env:
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: '20'
registry-url: 'https://registry.npmjs.org/'
scope: '@mendable'
always-auth: true
- name: Install pnpm
run: npm install -g pnpm
- name: Install python for running version check script
run: |
python -m pip install --upgrade pip
pip install setuptools wheel requests packaging
- name: Install dependencies for JavaScript SDK
run: pnpm install
working-directory: ./apps/js-sdk/firecrawl
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build and publish to npm
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
npm run build-and-publish
working-directory: ./apps/js-sdk/firecrawl

View File

@ -0,0 +1,47 @@
name: Publish Python SDK
on: []
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine build requests packaging
- name: Run version check script
id: version_check_script
run: |
VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py)
echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV
- name: Build the package
if: ${{ env.VERSION_INCREMENTED == 'true' }}
run: |
python -m build
working-directory: ./apps/python-sdk
- name: Publish to PyPI
if: ${{ env.VERSION_INCREMENTED == 'true' }}
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/*
working-directory: ./apps/python-sdk

View File

@ -1,9 +1,7 @@
name: Run Python SDK E2E Tests
on:
pull_request:
branches:
- main
on: []
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}

View File

@ -17,7 +17,7 @@ describe("E2E Tests for API Routes", () => {
delete process.env.USE_DB_AUTHENTICATION;
});
describe("GET /", () => {
it("should return Hello, world! message", async () => {
it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/");
expect(response.statusCode).toBe(200);
@ -26,7 +26,7 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /test", () => {
it("should return Hello, world! message", async () => {
it.concurrent("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/test");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("Hello, world!");
@ -34,12 +34,12 @@ describe("E2E Tests for API Routes", () => {
});
describe("POST /v0/scrape", () => {
it("should require authorization", async () => {
it.concurrent("should require authorization", async () => {
const response = await request(app).post("/v0/scrape");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer invalid-api-key`)
@ -48,7 +48,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
it("should return an error for a blocklisted URL", async () => {
it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://facebook.com/fake-test";
const response = await request(TEST_URL)
.post("/v0/scrape")
@ -61,37 +61,38 @@ describe("E2E Tests for API Routes", () => {
);
});
it("should return a successful response with a valid preview token", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://roastmywebsite.ai" });
expect(response.statusCode).toBe(200);
}, 30000); // 30 seconds timeout
// tested on rate limit test
// it.concurrent("should return a successful response with a valid preview token", async () => {
// const response = await request(TEST_URL)
// .post("/v0/scrape")
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
// .set("Content-Type", "application/json")
// .send({ url: "https://roastmywebsite.ai" });
// expect(response.statusCode).toBe(200);
// }, 30000); // 30 seconds timeout
it("should return a successful response with a valid API key", async () => {
it.concurrent("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
.send({ url: "https://roastmywebsite.ai" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data).not.toHaveProperty("html");
expect(response.body.data.content).toContain("🔥 Firecrawl");
expect(response.body.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout
it("should return a successful response with a valid API key and includeHtml set to true", async () => {
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
url: "https://firecrawl.dev",
url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(response.statusCode).toBe(200);
@ -100,12 +101,12 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("html");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain("🔥 Firecrawl");
expect(response.body.data.markdown).toContain("🔥 Firecrawl");
expect(response.body.data.content).toContain("_Roast_");
expect(response.body.data.markdown).toContain("_Roast_");
expect(response.body.data.html).toContain("<h1");
}, 30000); // 30 seconds timeout
it('should return a successful response for a valid scrape with PDF file', async () => {
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -120,7 +121,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 60000); // 60 seconds
it('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -136,7 +137,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds
// TODO: add this test back once we nail the waitFor option to be more deterministic
// it("should return a successful response with a valid API key and waitFor option", async () => {
// it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
// const startTime = Date.now();
// const response = await request(TEST_URL)
// .post("/v0/scrape")
@ -158,12 +159,12 @@ describe("E2E Tests for API Routes", () => {
});
describe("POST /v0/crawl", () => {
it("should require authorization", async () => {
it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer invalid-api-key`)
@ -172,7 +173,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
it("should return an error for a blocklisted URL", async () => {
it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://twitter.com/fake-test";
const response = await request(TEST_URL)
.post("/v0/crawl")
@ -185,7 +186,7 @@ describe("E2E Tests for API Routes", () => {
);
});
it("should return a successful response with a valid API key for crawl", async () => {
it.concurrent("should return a successful response with a valid API key for crawl", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -197,7 +198,7 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
it('should prevent duplicate requests using the same idempotency key', async () => {
it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4();
// First request with the idempotency key
@ -222,7 +223,7 @@ describe("E2E Tests for API Routes", () => {
expect(secondResponse.body.error).toBe('Idempotency key already used');
});
it("should return a successful response with a valid API key and valid includes option", async () => {
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -259,7 +260,6 @@ describe("E2E Tests for API Routes", () => {
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
console.log({url})
expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
});
@ -273,7 +273,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds
it("should return a successful response with a valid API key and valid excludes option", async () => {
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -314,7 +314,7 @@ describe("E2E Tests for API Routes", () => {
});
}, 90000); // 90 seconds
it("should return a successful response with a valid API key and limit to 3", async () => {
it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -354,7 +354,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable");
}, 60000); // 60 seconds
it("should return a successful response with max depth option for a valid crawl job", async () => {
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -396,7 +396,7 @@ describe("E2E Tests for API Routes", () => {
});
}, 120000);
// it("should return a successful response with a valid API key and valid limit option", async () => {
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
// const crawlResponse = await request(TEST_URL)
// .post("/v0/crawl")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -441,13 +441,13 @@ describe("E2E Tests for API Routes", () => {
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
// }, 60000); // 60 seconds
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
url: "https://firecrawl.dev",
url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@ -486,19 +486,19 @@ describe("E2E Tests for API Routes", () => {
// 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
expect(completedResponse.body.data[0].content).toContain("_Roast_");
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain("<h1");
}, 60000);
});
describe("POST /v0/crawlWebsitePreview", () => {
it("should require authorization", async () => {
it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer invalid-api-key`)
@ -507,7 +507,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
// it("should return an error for a blocklisted URL", async () => {
// it.concurrent("should return an error for a blocklisted URL", async () => {
// const blocklistedUrl = "https://instagram.com/fake-test";
// const response = await request(TEST_URL)
// .post("/v0/crawlWebsitePreview")
@ -519,7 +519,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
// });
it("should return a timeout error when scraping takes longer than the specified timeout", async () => {
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -529,27 +529,27 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(408);
}, 3000);
it("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
// it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
// const response = await request(TEST_URL)
// .post("/v0/crawlWebsitePreview")
// .set("Authorization", `Bearer this_is_just_a_preview_token`)
// .set("Content-Type", "application/json")
// .send({ url: "https://firecrawl.dev" });
// expect(response.statusCode).toBe(200);
// expect(response.body).toHaveProperty("jobId");
// expect(response.body.jobId).toMatch(
// /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
// );
// });
});
describe("POST /v0/search", () => {
it("should require authorization", async () => {
it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/search");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/search")
.set("Authorization", `Bearer invalid-api-key`)
@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
it("should return a successful response with a valid API key for search", async () => {
it.concurrent("should return a successful response with a valid API key for search", async () => {
const response = await request(TEST_URL)
.post("/v0/search")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -572,31 +572,31 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /v0/crawl/status/:jobId", () => {
it("should require authorization", async () => {
it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).get("/v0/crawl/status/123");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
});
it("should return Job not found for invalid job ID", async () => {
it.concurrent("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
});
it("should return a successful crawl status response for a valid crawl job", async () => {
it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
.send({ url: "https://roastmywebsite.ai" });
expect(crawlResponse.statusCode).toBe(200);
let isCompleted = false;
@ -622,10 +622,10 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
}, 60000); // 60 seconds
expect(completedResponse.body.data[0].content).toContain("_Roast_");
}, 120000); // 120 seconds
it('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension', async () => {
const crawlResponse = await request(TEST_URL)
.post('/v0/crawl')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
@ -660,9 +660,9 @@ describe("E2E Tests for API Routes", () => {
})
])
);
}, 60000); // 60 seconds
}, 120000); // 120 seconds
it("should return a successful response with max depth option for a valid crawl job", async () => {
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -705,15 +705,15 @@ describe("E2E Tests for API Routes", () => {
const depth = new URL(url).pathname.split("/").filter(Boolean).length;
expect(depth).toBeLessThanOrEqual(1);
});
}, 120000);
}, 180000);
it("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
url: "https://firecrawl.dev",
url: "https://roastmywebsite.ai",
pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@ -725,12 +725,23 @@ describe("E2E Tests for API Routes", () => {
expect(response.body).toHaveProperty("status");
expect(response.body.status).toBe("active");
// wait for 30 seconds
await new Promise((r) => setTimeout(r, 30000));
let isFinished = false;
let completedResponse;
const completedResponse = await request(TEST_URL)
while (!isFinished) {
const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
if (response.body.status === "completed") {
isFinished = true;
completedResponse = response;
} else {
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
}
}
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
@ -739,17 +750,14 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
// 120 seconds
expect(completedResponse.body.data[0]).toHaveProperty("html");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 Firecrawl");
expect(completedResponse.body.data[0].markdown).toContain("Firecrawl");
expect(completedResponse.body.data[0].content).toContain("_Roast_");
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain("<h1");
}, 60000);
}); // 60 seconds
it("If someone cancels a crawl job, it should turn into failed status", async () => {
it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -785,7 +793,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds
describe("POST /v0/scrape with LLM Extraction", () => {
it("should extract data using LLM extraction mode", async () => {
it.concurrent("should extract data using LLM extraction mode", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -836,7 +844,7 @@ describe("E2E Tests for API Routes", () => {
});
// describe("POST /v0/scrape for Top 100 Companies", () => {
// it("should extract data for the top 100 companies", async () => {
// it.concurrent("should extract data for the top 100 companies", async () => {
// const response = await request(TEST_URL)
// .post("/v0/scrape")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -894,7 +902,7 @@ describe("E2E Tests for API Routes", () => {
// });
describe("POST /v0/crawl with fast mode", () => {
it("should complete the crawl under 20 seconds", async () => {
it.concurrent("should complete the crawl under 20 seconds", async () => {
const startTime = Date.now();
const crawlResponse = await request(TEST_URL)
@ -927,10 +935,10 @@ describe("E2E Tests for API Routes", () => {
}
}
const endTime = Date.now();
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
// const endTime = Date.now();
// const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
console.log(`Time elapsed: ${timeElapsed} seconds`);
// console.log(`Time elapsed: ${timeElapsed} seconds`);
expect(statusResponse.body.status).toBe("completed");
expect(statusResponse.body).toHaveProperty("data");
@ -945,7 +953,7 @@ describe("E2E Tests for API Routes", () => {
}, 20000);
// it("should complete the crawl in more than 10 seconds", async () => {
// it.concurrent("should complete the crawl in more than 10 seconds", async () => {
// const startTime = Date.now();
// const crawlResponse = await request(TEST_URL)
@ -995,7 +1003,7 @@ describe("E2E Tests for API Routes", () => {
});
describe("GET /is-production", () => {
it("should return the production status", async () => {
it.concurrent("should return the production status", async () => {
const response = await request(TEST_URL).get("/is-production");
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction");
@ -1003,8 +1011,8 @@ describe("E2E Tests for API Routes", () => {
});
describe("Rate Limiter", () => {
it("should return 429 when rate limit is exceeded for preview token", async () => {
for (let i = 0; i < 4; i++) {
it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
for (let i = 0; i < 5; i++) {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
@ -1020,10 +1028,10 @@ describe("E2E Tests for API Routes", () => {
.send({ url: "https://www.scrapethissite.com" });
expect(response.statusCode).toBe(429);
}, 60000);
}, 90000);
});
// it("should return 429 when rate limit is exceeded for API key", async () => {
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_SCRAPE); i++) {
// const response = await request(TEST_URL)
// .post("/v0/scrape")
@ -1043,7 +1051,7 @@ describe("E2E Tests for API Routes", () => {
// expect(response.statusCode).toBe(429);
// }, 60000);
// it("should return 429 when rate limit is exceeded for API key", async () => {
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
// for (let i = 0; i < parseInt(process.env.RATE_LIMIT_TEST_API_KEY_CRAWL); i++) {
// const response = await request(TEST_URL)
// .post("/v0/crawl")

View File

@ -1,13 +1,13 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.23",
"version": "0.0.25",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",
"type": "module",
"scripts": {
"build": "tsc",
"publish": "npm run build && npm publish --access public",
"build-and-publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/__tests__/**/*.test.ts"
},

View File

@ -8,94 +8,94 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL;
describe('FirecrawlApp E2E Tests', () => {
test('should throw error for no API key', () => {
test.concurrent('should throw error for no API key', () => {
expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided");
});
test('should throw error for invalid API key on scrape', async () => {
test.concurrent('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
});
test('should throw error for blocklisted URL on scrape', async () => {
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
test('should return successful response with valid preview token', async () => {
test.concurrent('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape', async () => {
test.concurrent('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
const response = await app.scrapeUrl('https://roastmywebsite.ai');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.content).toContain("_Roast_");
expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html');
}, 30000); // 30 seconds timeout
test('should return successful response with valid API key and include HTML', async () => {
test.concurrent('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.markdown).toContain("🔥 Firecrawl");
expect(response.data.content).toContain("_Roast_");
expect(response.data.markdown).toContain("_Roast_");
expect(response.data.html).toContain("<h1");
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file', async () => {
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test('should throw error for invalid API key on crawl', async () => {
test.concurrent('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
});
test('should throw error for blocklisted URL on crawl', async () => {
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});
test('should return successful response for crawl and wait for completion', async () => {
test.concurrent('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");
expect(response[0].content).toContain("_Roast_");
}, 60000); // 60 seconds timeout
test('should handle idempotency key for crawl', async () => {
test.concurrent('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
});
test('should check crawl status', async () => {
test.concurrent('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
@ -115,7 +115,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(statusResponse.data.length).toBeGreaterThan(0);
}, 35000); // 35 seconds timeout
test('should return successful response for search', async () => {
test.concurrent('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.search("test query");
expect(response).not.toBeNull();
@ -123,12 +123,12 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout
test('should throw error for invalid API key on search', async () => {
test.concurrent('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
});
test('should perform LLM extraction', async () => {
test.concurrent('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {

View File

@ -1,3 +1,3 @@
from .firecrawl import FirecrawlApp
__version__ = "0.0.11"
__version__ = "0.0.13"

View File

@ -38,31 +38,31 @@ def test_blocklisted_url():
def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
response = app.scrape_url('https://firecrawl.dev')
response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' in response
assert "🔥 Firecrawl" in response['content']
assert "_Roast_" in response['content']
def test_scrape_url_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://firecrawl.dev')
response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' in response
assert 'markdown' in response
assert 'metadata' in response
assert 'html' not in response
assert "🔥 Firecrawl" in response['content']
assert "_Roast_" in response['content']
def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
assert response is not None
assert 'content' in response
assert 'markdown' in response
assert 'html' in response
assert 'metadata' in response
assert "🔥 Firecrawl" in response['content']
assert "🔥 Firecrawl" in response['markdown']
assert "_Roast_" in response['content']
assert "_Roast_" in response['markdown']
assert "<h1" in response['html']
def test_successful_response_for_valid_scrape_with_pdf_file():
@ -97,20 +97,20 @@ def test_should_return_error_for_blocklisted_url():
def test_crawl_url_wait_for_completion_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content']
assert "_Roast_" in response[0]['content']
def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
uniqueIdempotencyKey = str(uuid4())
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
assert "🔥 Firecrawl" in response[0]['content']
assert "_Roast_" in response[0]['content']
with pytest.raises(Exception) as excinfo:
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)