0

Merge pull request #40 from mendableai/cjp/contributors-guide-and

WIP: Cjp/making db auth optional <> Running project locally
This commit is contained in:
Nicolas 2024-04-21 11:57:54 -07:00 committed by GitHub
commit cbff1b4f73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 630 additions and 231 deletions

View File

@ -54,5 +54,5 @@ jobs:
id: start_workers id: start_workers
- name: Run E2E tests - name: Run E2E tests
run: | run: |
npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false npm run test:prod
working-directory: ./apps/api working-directory: ./apps/api

2
.gitignore vendored
View File

@ -6,3 +6,5 @@
dump.rdb dump.rdb
/mongo-data /mongo-data
apps/js-sdk/node_modules/ apps/js-sdk/node_modules/
apps/api/.env.local

View File

@ -1,8 +1,114 @@
# Contributing # Contributors guide:
We love contributions! Our contribution guide will be coming soon! Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute)
<!-- Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. --> If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue!
## Running the project locally
First, start by installing dependencies
1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs)
2. pnpm [instructions](https://pnpm.io/installation)
3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/)
Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example.
To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features )
.env:
```
# ===== Required ENVS ======
NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://localhost:6379
## To turn on DB authentication, you need to set up supabase.
USE_DB_AUTHENTICATION=false
# ===== Optional ENVS ======
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
# Other Optionals
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= #
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
```
### Installing dependencies
First, install the dependencies using pnpm.
```bash
pnpm install
```
### Running the project
You're going to need to open 3 terminals.
### Terminal 1 - setting up redis
Run the command anywhere within your project
```bash
redis-server
```
### Terminal 2 - setting up workers
Now, navigate to the apps/api/ directory and run:
```bash
pnpm run workers
```
This will start the workers who are responsible for processing crawl jobs.
### Terminal 3 - setting up the main server
To do this, navigate to the apps/api/ directory and run if you dont have this already, install pnpm here: https://pnpm.io/installation
Next, run your server with:
```bash
pnpm run start
```
### Terminal 3 - sending our first request.
Alright: now lets send our first request.
```curl
curl -X GET http://localhost:3002/test
```
This should return the response Hello, world!
If youd like to test the crawl endpoint, you can run this
```curl
curl -X POST http://localhost:3002/v0/crawl \
-H 'Content-Type: application/json' \
-d '{
"url": "https://mendable.ai"
}'
```
## Tests:
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.
If you'd like to run the tests with authentication, run `npm run test:prod`

View File

@ -21,7 +21,7 @@ We provide an easy to use API with our hosted version. You can find the playgrou
- [ ] LangchainJS - Coming Soon - [ ] LangchainJS - Coming Soon
Self-host. To self-host refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/SELF_HOST.md). To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md).
### API Key ### API Key

View File

@ -1,6 +1,6 @@
# Self-hosting Firecrawl # Self-hosting Firecrawl
Guide coming soon. Refer to [CONTRIBUTING.md](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) for instructions on how to run it locally.
*This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it* *This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it*

24
apps/api/.env.example Normal file
View File

@ -0,0 +1,24 @@
# ===== Required ENVS ======
NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://localhost:6379
## To turn on DB authentication, you need to set up supabase.
USE_DB_AUTHENTICATION=true
# ===== Optional ENVS ======
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
# Other Optionals
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= #
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs

View File

@ -1,15 +0,0 @@
ENV=
NUM_WORKERS_PER_QUEUE=8
PORT=
HOST=
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
REDIS_URL=
SCRAPING_BEE_API_KEY=
OPENAI_API_KEY=
BULL_AUTH_KEY=
LOGTAIL_KEY=
PLAYWRIGHT_MICROSERVICE_URL=
LLAMAPARSE_API_KEY=
TEST_API_KEY=

View File

@ -11,6 +11,8 @@
"start:dev": "nodemon --exec ts-node src/index.ts", "start:dev": "nodemon --exec ts-node src/index.ts",
"build": "tsc", "build": "tsc",
"test": "jest --verbose", "test": "jest --verbose",
"test:local-no-auth":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:prod":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
"workers": "nodemon --exec ts-node src/services/queue-worker.ts", "workers": "nodemon --exec ts-node src/services/queue-worker.ts",
"worker:production": "node dist/src/services/queue-worker.js", "worker:production": "node dist/src/services/queue-worker.js",
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",

View File

@ -1,179 +0,0 @@
import request from 'supertest';
import { app } from '../../index';
import dotenv from 'dotenv';
dotenv.config();
const TEST_URL = 'http://localhost:3002'
describe('E2E Tests for API Routes', () => {
describe('GET /', () => {
it('should return Hello, world! message', async () => {
const response = await request(TEST_URL).get('/');
expect(response.statusCode).toBe(200);
expect(response.text).toContain('SCRAPERS-JS: Hello, world! Fly.io');
});
});
describe('GET /test', () => {
it('should return Hello, world! message', async () => {
const response = await request(TEST_URL).get('/test');
expect(response.statusCode).toBe(200);
expect(response.text).toContain('Hello, world!');
});
});
describe('POST /v0/scrape', () => {
it('should require authorization', async () => {
const response = await request(app).post('/v0/scrape');
expect(response.statusCode).toBe(401);
});
it('should return an error response with an invalid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer invalid-api-key`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(401);
});
it('should return a successful response with a valid preview token', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer this_is_just_a_preview_token`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(200);
}, 10000); // 10 seconds timeout
it('should return a successful response with a valid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('data');
expect(response.body.data).toHaveProperty('content');
expect(response.body.data).toHaveProperty('markdown');
expect(response.body.data).toHaveProperty('metadata');
expect(response.body.data.content).toContain('🔥 FireCrawl');
}, 30000); // 30 seconds timeout
});
describe('POST /v0/crawl', () => {
it('should require authorization', async () => {
const response = await request(TEST_URL).post('/v0/crawl');
expect(response.statusCode).toBe(401);
});
it('should return an error response with an invalid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/crawl')
.set('Authorization', `Bearer invalid-api-key`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(401);
});
it('should return a successful response with a valid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/crawl')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('jobId');
expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/);
});
// Additional tests for insufficient credits?
});
describe('POST /v0/crawlWebsitePreview', () => {
it('should require authorization', async () => {
const response = await request(TEST_URL).post('/v0/crawlWebsitePreview');
expect(response.statusCode).toBe(401);
});
it('should return an error response with an invalid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/crawlWebsitePreview')
.set('Authorization', `Bearer invalid-api-key`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(401);
});
it('should return a successful response with a valid API key', async () => {
const response = await request(TEST_URL)
.post('/v0/crawlWebsitePreview')
.set('Authorization', `Bearer this_is_just_a_preview_token`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('jobId');
expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/);
});
});
describe('GET /v0/crawl/status/:jobId', () => {
it('should require authorization', async () => {
const response = await request(TEST_URL).get('/v0/crawl/status/123');
expect(response.statusCode).toBe(401);
});
it('should return an error response with an invalid API key', async () => {
const response = await request(TEST_URL)
.get('/v0/crawl/status/123')
.set('Authorization', `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
});
it('should return Job not found for invalid job ID', async () => {
const response = await request(TEST_URL)
.get('/v0/crawl/status/invalidJobId')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
});
it('should return a successful response for a valid crawl job', async () => {
const crawlResponse = await request(TEST_URL)
.post('/v0/crawl')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json')
.send({ url: 'https://firecrawl.dev' });
expect(crawlResponse.statusCode).toBe(200);
const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('status');
expect(response.body.status).toBe('active');
// wait for 30 seconds
await new Promise((r) => setTimeout(r, 30000));
const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty('status');
expect(completedResponse.body.status).toBe('completed');
expect(completedResponse.body).toHaveProperty('data');
expect(completedResponse.body.data[0]).toHaveProperty('content');
expect(completedResponse.body.data[0]).toHaveProperty('markdown');
expect(completedResponse.body.data[0]).toHaveProperty('metadata');
expect(completedResponse.body.data[0].content).toContain('🔥 FireCrawl');
}, 60000); // 60 seconds
});
describe('GET /is-production', () => {
it('should return the production status', async () => {
const response = await request(TEST_URL).get('/is-production');
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('isProduction');
});
});
});

View File

@ -0,0 +1,156 @@
import request from "supertest";
import { app } from "../../index";
import dotenv from "dotenv";
const fs = require("fs");
const path = require("path");
dotenv.config();
const TEST_URL = "http://127.0.0.1:3002";
describe("E2E Tests for API Routes with No Authentication", () => {
let originalEnv: NodeJS.ProcessEnv;
// save original process.env
beforeAll(() => {
originalEnv = { ...process.env };
process.env.USE_DB_AUTHENTICATION = "false";
process.env.SUPABASE_ANON_TOKEN = "";
process.env.SUPABASE_URL = "";
process.env.SUPABASE_SERVICE_TOKEN = "";
process.env.SCRAPING_BEE_API_KEY = "";
process.env.OPENAI_API_KEY = "";
process.env.BULL_AUTH_KEY = "";
process.env.LOGTAIL_KEY = "";
process.env.PLAYWRIGHT_MICROSERVICE_URL = "";
process.env.LLAMAPARSE_API_KEY = "";
process.env.TEST_API_KEY = "";
});
// restore original process.env
afterAll(() => {
process.env = originalEnv;
});
describe("GET /", () => {
it("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io");
});
});
describe("GET /test", () => {
it("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/test");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("Hello, world!");
});
});
describe("POST /v0/scrape", () => {
it("should not require authorization", async () => {
const response = await request(TEST_URL).post("/v0/scrape");
expect(response.statusCode).not.toBe(401);
});
it("should return a successful response", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
}, 10000); // 10 seconds timeout
});
describe("POST /v0/crawl", () => {
it("should not require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).not.toBe(401);
});
it("should return a successful response", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
});
describe("POST /v0/crawlWebsitePreview", () => {
it("should not require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview");
expect(response.statusCode).not.toBe(401);
});
it("should return a successful response", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
});
describe("GET /v0/crawl/status/:jobId", () => {
it("should not require authorization", async () => {
const response = await request(TEST_URL).get("/v0/crawl/status/123");
expect(response.statusCode).not.toBe(401);
});
it("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL).get(
"/v0/crawl/status/invalidJobId"
);
expect(response.statusCode).toBe(404);
});
it("should return a successful response for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(crawlResponse.statusCode).toBe(200);
const response = await request(TEST_URL).get(
`/v0/crawl/status/${crawlResponse.body.jobId}`
);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
expect(response.body.status).toBe("active");
// wait for 30 seconds
await new Promise((r) => setTimeout(r, 30000));
const completedResponse = await request(TEST_URL).get(
`/v0/crawl/status/${crawlResponse.body.jobId}`
);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
expect(completedResponse.body.status).toBe("completed");
expect(completedResponse.body).toHaveProperty("data");
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("🔥 FireCrawl");
}, 60000); // 60 seconds
});
describe("GET /is-production", () => {
it("should return the production status", async () => {
const response = await request(TEST_URL).get("/is-production");
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction");
});
});
});

View File

@ -0,0 +1,197 @@
import request from "supertest";
import { app } from "../../index";
import dotenv from "dotenv";
dotenv.config();
// const TEST_URL = 'http://localhost:3002'
const TEST_URL = "http://127.0.0.1:3002";
describe("E2E Tests for API Routes", () => {
beforeAll(() => {
process.env.USE_DB_AUTHENTICATION = "true";
});
afterAll(() => {
delete process.env.USE_DB_AUTHENTICATION;
});
describe("GET /", () => {
it("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io");
});
});
describe("GET /test", () => {
it("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/test");
expect(response.statusCode).toBe(200);
expect(response.text).toContain("Hello, world!");
});
});
describe("POST /v0/scrape", () => {
it("should require authorization", async () => {
const response = await request(app).post("/v0/scrape");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
});
it("should return a successful response with a valid preview token", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
}, 10000); // 10 seconds timeout
it("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain("🔥 FireCrawl");
}, 30000); // 30 seconds timeout
});
describe("POST /v0/crawl", () => {
it("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
});
it("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
// Additional tests for insufficient credits?
});
describe("POST /v0/crawlWebsitePreview", () => {
it("should require authorization", async () => {
const response = await request(TEST_URL).post(
"/v0/crawlWebsitePreview"
);
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
});
it("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
});
describe("GET /v0/crawl/status/:jobId", () => {
it("should require authorization", async () => {
const response = await request(TEST_URL).get("/v0/crawl/status/123");
expect(response.statusCode).toBe(401);
});
it("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
});
it("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL)
.get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
});
it("should return a successful response for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(crawlResponse.statusCode).toBe(200);
const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
expect(response.body.status).toBe("active");
// wait for 30 seconds
await new Promise((r) => setTimeout(r, 30000));
const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
expect(completedResponse.body.status).toBe("completed");
expect(completedResponse.body).toHaveProperty("data");
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain(
"🔥 FireCrawl"
);
}, 60000); // 60 seconds
});
describe("GET /is-production", () => {
it("should return the production status", async () => {
const response = await request(TEST_URL).get("/is-production");
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction");
});
});
});

View File

@ -1,9 +1,15 @@
import { parseApi } from "../../src/lib/parseApi"; import { parseApi } from "../../src/lib/parseApi";
import { getRateLimiter } from "../../src/services/rate-limiter"; import { getRateLimiter } from "../../src/services/rate-limiter";
import { RateLimiterMode } from "../../src/types"; import { AuthResponse, RateLimiterMode } from "../../src/types";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../src/services/supabase";
import { withAuth } from "../../src/lib/withAuth";
export async function authenticateUser(
export async function authenticateUser(req, res, mode?: RateLimiterMode) : Promise<AuthResponse> {
return withAuth(supaAuthenticateUser)(req, res, mode);
}
export async function supaAuthenticateUser(
req, req,
res, res,
mode?: RateLimiterMode mode?: RateLimiterMode
@ -13,6 +19,7 @@ export async function authenticateUser(
error?: string; error?: string;
status?: number; status?: number;
}> { }> {
const authHeader = req.headers.authorization; const authHeader = req.headers.authorization;
if (!authHeader) { if (!authHeader) {
return { success: false, error: "Unauthorized", status: 401 }; return { success: false, error: "Unauthorized", status: 401 };

View File

@ -23,7 +23,6 @@ export async function crawlController(req: Request, res: Response) {
return res.status(402).json({ error: "Insufficient credits" }); return res.status(402).json({ error: "Insufficient credits" });
} }
// authenticate on supabase
const url = req.body.url; const url = req.body.url;
if (!url) { if (!url) {
return res.status(400).json({ error: "Url is required" }); return res.status(400).json({ error: "Url is required" });
@ -42,7 +41,6 @@ export async function crawlController(req: Request, res: Response) {
returnOnlyUrls: true, returnOnlyUrls: true,
}, },
pageOptions: pageOptions, pageOptions: pageOptions,
}); });
const docs = await a.getDocuments(false, (progress) => { const docs = await a.getDocuments(false, (progress) => {

View File

@ -40,6 +40,7 @@ export async function scrapeHelper(
if (filteredDocs.length === 0) { if (filteredDocs.length === 0) {
return { success: true, error: "No page found", returnCode: 200 }; return { success: true, error: "No page found", returnCode: 200 };
} }
const { success, credit_usage } = await billTeam( const { success, credit_usage } = await billTeam(
team_id, team_id,
filteredDocs.length filteredDocs.length
@ -52,6 +53,7 @@ export async function scrapeHelper(
returnCode: 402, returnCode: 402,
}; };
} }
return { return {
success: true, success: true,
data: filteredDocs[0], data: filteredDocs[0],

View File

@ -5,7 +5,6 @@ import "dotenv/config";
import { getWebScraperQueue } from "./services/queue-service"; import { getWebScraperQueue } from "./services/queue-service";
import { redisClient } from "./services/rate-limiter"; import { redisClient } from "./services/rate-limiter";
import { v0Router } from "./routes/v0"; import { v0Router } from "./routes/v0";
const { createBullBoard } = require("@bull-board/api"); const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { BullAdapter } = require("@bull-board/api/bullAdapter");
const { ExpressAdapter } = require("@bull-board/express"); const { ExpressAdapter } = require("@bull-board/express");
@ -48,6 +47,7 @@ const DEFAULT_PORT = process.env.PORT ?? 3002;
const HOST = process.env.HOST ?? "localhost"; const HOST = process.env.HOST ?? "localhost";
redisClient.connect(); redisClient.connect();
export function startServer(port = DEFAULT_PORT) { export function startServer(port = DEFAULT_PORT) {
const server = app.listen(Number(port), HOST, () => { const server = app.listen(Number(port), HOST, () => {
console.log(`Server listening on port ${port}`); console.log(`Server listening on port ${port}`);

View File

@ -0,0 +1,24 @@
import { AuthResponse } from "../../src/types";
let warningCount = 0;
export function withAuth<T extends AuthResponse, U extends any[]>(
originalFunction: (...args: U) => Promise<T>
) {
return async function (...args: U): Promise<T> {
if (process.env.USE_DB_AUTHENTICATION === "false") {
if (warningCount < 5) {
console.warn("WARNING - You're bypassing authentication");
warningCount++;
}
return { success: true } as T;
} else {
try {
return await originalFunction(...args);
} catch (error) {
console.error("Error in withAuth function: ", error);
return { success: false, error: error.message } as T;
}
}
};
}

View File

@ -1,7 +1,12 @@
import { withAuth } from "../../lib/withAuth";
import { supabase_service } from "../supabase"; import { supabase_service } from "../supabase";
const FREE_CREDITS = 100; const FREE_CREDITS = 100;
export async function billTeam(team_id: string, credits: number) { export async function billTeam(team_id: string, credits: number) {
return withAuth(supaBillTeam)(team_id, credits);
}
export async function supaBillTeam(team_id: string, credits: number) {
if (team_id === "preview") { if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used" }; return { success: true, message: "Preview team, no credits used" };
} }
@ -52,8 +57,11 @@ export async function billTeam(team_id: string, credits: number) {
return { success: true, credit_usage }; return { success: true, credit_usage };
} }
// if team has enough credits for the operation, return true, else return false
export async function checkTeamCredits(team_id: string, credits: number) { export async function checkTeamCredits(team_id: string, credits: number) {
return withAuth(supaCheckTeamCredits)(team_id, credits);
}
// if team has enough credits for the operation, return true, else return false
export async function supaCheckTeamCredits(team_id: string, credits: number) {
if (team_id === "preview") { if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used" }; return { success: true, message: "Preview team, no credits used" };
} }

View File

@ -1,4 +1,19 @@
const { Logtail } = require("@logtail/node"); import { Logtail } from "@logtail/node";
//dot env import "dotenv/config";
require("dotenv").config();
export const logtail = new Logtail(process.env.LOGTAIL_KEY); // A mock Logtail class to handle cases where LOGTAIL_KEY is not provided
class MockLogtail {
info(message: string, context?: Record<string, any>): void {
console.log(message, context);
}
error(message: string, context: Record<string, any> = {}): void {
console.error(message, context);
}
}
// Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class
// Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided
export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => {
console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more.");
return new MockLogtail();
})();

View File

@ -1,6 +1,56 @@
import { createClient } from "@supabase/supabase-js"; import { createClient, SupabaseClient } from "@supabase/supabase-js";
export const supabase_service = createClient<any>( // SupabaseService class initializes the Supabase client conditionally based on environment variables.
process.env.SUPABASE_URL, class SupabaseService {
process.env.SUPABASE_SERVICE_TOKEN, private client: SupabaseClient | null = null;
);
constructor() {
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
// Warn the user that Authentication is disabled by setting the client to null
console.warn(
"\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"
);
this.client = null;
} else if (!supabaseUrl || !supabaseServiceToken) {
console.error(
"\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"
);
} else {
this.client = createClient(supabaseUrl, supabaseServiceToken);
}
}
// Provides access to the initialized Supabase client, if available.
getClient(): SupabaseClient | null {
return this.client;
}
}
// Using a Proxy to handle dynamic access to the Supabase client or service methods.
// This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error.
export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
const client = target.getClient();
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
if (client === null) {
console.error(
"Attempted to access Supabase client when it's not configured."
);
return () => {
throw new Error("Supabase client is not configured.");
};
}
// Direct access to SupabaseService properties takes precedence.
if (prop in target) {
return Reflect.get(target, prop, receiver);
}
// Otherwise, delegate access to the Supabase client.
return Reflect.get(client, prop, receiver);
},
}
) as unknown as SupabaseClient;

View File

@ -25,7 +25,6 @@ export interface WebScraperOptions {
origin?: string; origin?: string;
} }
export interface FirecrawlJob { export interface FirecrawlJob {
success: boolean; success: boolean;
message: string; message: string;
@ -40,8 +39,6 @@ export interface FirecrawlJob {
origin: string; origin: string;
} }
export enum RateLimiterMode { export enum RateLimiterMode {
Crawl = "crawl", Crawl = "crawl",
CrawlStatus = "crawl-status", CrawlStatus = "crawl-status",
@ -49,4 +46,9 @@ export enum RateLimiterMode {
Preview = "preview", Preview = "preview",
} }
export interface AuthResponse {
success: boolean;
team_id?: string;
error?: string;
status?: number;
}