Add tests
This commit is contained in:
parent
9b254c1cd0
commit
59451754f5
@ -482,6 +482,49 @@ describe("E2E Tests for API Routes", () => {
|
||||
});
|
||||
}, 120000);
|
||||
|
||||
it.concurrent("should return a successful response with relative max depth option for a valid crawl job different depths (2)", async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://www.scrapethissite.com",
|
||||
crawlerOptions: { maxDepth: 2 },
|
||||
});
|
||||
expect(crawlResponse.statusCode).toBe(200);
|
||||
|
||||
const response = await request(TEST_URL)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("status");
|
||||
expect(response.body.status).toBe("active");
|
||||
// wait for 60 seconds
|
||||
await new Promise((r) => setTimeout(r, 60000));
|
||||
const completedResponse = await request(TEST_URL)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
|
||||
expect(completedResponse.statusCode).toBe(200);
|
||||
expect(completedResponse.body).toHaveProperty("status");
|
||||
expect(completedResponse.body.status).toBe("completed");
|
||||
expect(completedResponse.body).toHaveProperty("data");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
const urls = completedResponse.body.data.map(
|
||||
(item: any) => item.metadata?.sourceURL
|
||||
);
|
||||
expect(urls.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
|
||||
urls.forEach((url: string) => {
|
||||
const pathSplits = new URL(url).pathname.split('/');
|
||||
const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
|
||||
expect(depth).toBeLessThanOrEqual(3);
|
||||
});
|
||||
}, 120000);
|
||||
|
||||
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
|
||||
// const crawlResponse = await request(TEST_URL)
|
||||
// .post("/v0/crawl")
|
||||
|
@ -166,8 +166,7 @@ export class WebScraperDataProvider {
|
||||
const pathSplits = new URL(this.urls[0]).pathname.split('/');
|
||||
const baseURLDepth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
|
||||
const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth;
|
||||
|
||||
|
||||
|
||||
const crawler = new WebCrawler({
|
||||
initialUrl: this.urls[0],
|
||||
includes: this.includes,
|
||||
|
Loading…
x
Reference in New Issue
Block a user