0

Added some types

This commit is contained in:
rafaelsideguide 2024-06-18 10:55:07 -03:00
parent b2bd562bb2
commit 20f14bcf7f
2 changed files with 60 additions and 31 deletions

View File

@ -1,5 +1,6 @@
import request from "supertest"; import request from "supertest";
import dotenv from "dotenv"; import dotenv from "dotenv";
import { FirecrawlCrawlResponse, FirecrawlCrawlStatusResponse, FirecrawlScrapeResponse } from "../../types";
dotenv.config(); dotenv.config();
const TEST_URL = "http://127.0.0.1:3002"; const TEST_URL = "http://127.0.0.1:3002";
@ -23,12 +24,12 @@ describe("E2E Tests for API Routes", () => {
describe("POST /v0/scrape", () => { describe("POST /v0/scrape", () => {
it.concurrent("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/scrape"); const response: FirecrawlScrapeResponse = await request(TEST_URL).post("/v0/scrape");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it.concurrent("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -37,7 +38,7 @@ describe("E2E Tests for API Routes", () => {
}); });
it.concurrent("should return a successful response with a valid API key", async () => { it.concurrent("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -54,7 +55,7 @@ describe("E2E Tests for API Routes", () => {
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => { it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -76,7 +77,7 @@ describe("E2E Tests for API Routes", () => {
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => { it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -93,7 +94,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => { it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -110,7 +111,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent("should return a successful response with a valid API key with removeTags option", async () => { it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
const responseWithoutRemoveTags = await request(TEST_URL) const responseWithoutRemoveTags: FirecrawlScrapeResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -126,7 +127,7 @@ describe("E2E Tests for API Routes", () => {
expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav
expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -144,7 +145,7 @@ describe("E2E Tests for API Routes", () => {
}, 30000); // 30 seconds timeout }, 30000); // 30 seconds timeout
it.concurrent('should return a successful response for a scrape with 400 page', async () => { it.concurrent('should return a successful response for a scrape with 400 page', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -160,7 +161,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent('should return a successful response for a scrape with 401 page', async () => { it.concurrent('should return a successful response for a scrape with 401 page', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -176,7 +177,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent("should return a successful response for a scrape with 403 page", async () => { it.concurrent("should return a successful response for a scrape with 403 page", async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -192,7 +193,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent('should return a successful response for a scrape with 404 page', async () => { it.concurrent('should return a successful response for a scrape with 404 page', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -224,7 +225,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent('should return a successful response for a scrape with 500 page', async () => { it.concurrent('should return a successful response for a scrape with 500 page', async () => {
const response = await request(TEST_URL) const response: FirecrawlScrapeResponse = await request(TEST_URL)
.post('/v0/scrape') .post('/v0/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json') .set('Content-Type', 'application/json')
@ -242,12 +243,12 @@ describe("E2E Tests for API Routes", () => {
describe("POST /v0/crawl", () => { describe("POST /v0/crawl", () => {
it.concurrent("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawl"); const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawl");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it.concurrent("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -256,7 +257,7 @@ describe("E2E Tests for API Routes", () => {
}); });
it.concurrent("should return a successful response with a valid API key for crawl", async () => { it.concurrent("should return a successful response with a valid API key for crawl", async () => {
const response = await request(TEST_URL) const response: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -269,7 +270,7 @@ describe("E2E Tests for API Routes", () => {
}); });
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => { it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -281,7 +282,7 @@ describe("E2E Tests for API Routes", () => {
}, },
}); });
let response; let response: FirecrawlCrawlStatusResponse;
let isFinished = false; let isFinished = false;
while (!isFinished) { while (!isFinished) {
@ -321,7 +322,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => { it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -334,7 +335,7 @@ describe("E2E Tests for API Routes", () => {
}); });
let isFinished = false; let isFinished = false;
let response; let response: FirecrawlCrawlStatusResponse;
while (!isFinished) { while (!isFinished) {
response = await request(TEST_URL) response = await request(TEST_URL)
@ -350,7 +351,7 @@ describe("E2E Tests for API Routes", () => {
} }
} }
const completedResponse = response; const completedResponse: FirecrawlCrawlStatusResponse = response;
const urls = completedResponse.body.data.map( const urls = completedResponse.body.data.map(
(item: any) => item.metadata?.sourceURL (item: any) => item.metadata?.sourceURL
@ -362,7 +363,7 @@ describe("E2E Tests for API Routes", () => {
}, 90000); // 90 seconds }, 90000); // 90 seconds
it.concurrent("should return a successful response with a valid API key and limit to 3", async () => { it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -372,7 +373,7 @@ describe("E2E Tests for API Routes", () => {
}); });
let isFinished = false; let isFinished = false;
let response; let response: FirecrawlCrawlStatusResponse;
while (!isFinished) { while (!isFinished) {
response = await request(TEST_URL) response = await request(TEST_URL)
@ -388,7 +389,7 @@ describe("E2E Tests for API Routes", () => {
} }
} }
const completedResponse = response; const completedResponse: FirecrawlCrawlStatusResponse = response;
expect(completedResponse.statusCode).toBe(200); expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status"); expect(completedResponse.body).toHaveProperty("status");
@ -404,7 +405,7 @@ describe("E2E Tests for API Routes", () => {
}, 60000); // 60 seconds }, 60000); // 60 seconds
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => { it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
const crawlResponse = await request(TEST_URL) const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -414,7 +415,7 @@ describe("E2E Tests for API Routes", () => {
}); });
expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
const response = await request(TEST_URL) const response: FirecrawlCrawlStatusResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`) .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200); expect(response.statusCode).toBe(200);
@ -432,7 +433,7 @@ describe("E2E Tests for API Routes", () => {
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
} }
} }
const completedResponse = await request(TEST_URL) const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`) .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
@ -461,12 +462,12 @@ describe("E2E Tests for API Routes", () => {
describe("POST /v0/crawlWebsitePreview", () => { describe("POST /v0/crawlWebsitePreview", () => {
it.concurrent("should require authorization", async () => { it.concurrent("should require authorization", async () => {
const response = await request(TEST_URL).post("/v0/crawlWebsitePreview"); const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawlWebsitePreview");
expect(response.statusCode).toBe(401); expect(response.statusCode).toBe(401);
}); });
it.concurrent("should return an error response with an invalid API key", async () => { it.concurrent("should return an error response with an invalid API key", async () => {
const response = await request(TEST_URL) const response: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/crawlWebsitePreview") .post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer invalid-api-key`) .set("Authorization", `Bearer invalid-api-key`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
@ -475,7 +476,7 @@ describe("E2E Tests for API Routes", () => {
}); });
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => { it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
const response = await request(TEST_URL) const response: FirecrawlCrawlResponse = await request(TEST_URL)
.post("/v0/scrape") .post("/v0/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")

View File

@ -1,4 +1,4 @@
import { ExtractorOptions } from "./lib/entities"; import { ExtractorOptions, Document } from "./lib/entities";
export interface CrawlResult { export interface CrawlResult {
source: string; source: string;
@ -43,6 +43,34 @@ export interface FirecrawlJob {
num_tokens?: number, num_tokens?: number,
} }
export interface FirecrawlScrapeResponse {
statusCode: number;
body: {
status: string;
data: Document;
};
error?: string;
}
export interface FirecrawlCrawlResponse {
statusCode: number;
body: {
status: string;
jobId: string;
};
error?: string;
}
export interface FirecrawlCrawlStatusResponse {
statusCode: number;
body: {
status: string;
data: Document[];
};
error?: string;
}
export enum RateLimiterMode { export enum RateLimiterMode {
Crawl = "crawl", Crawl = "crawl",
CrawlStatus = "crawlStatus", CrawlStatus = "crawlStatus",