0
This commit is contained in:
rafaelsideguide 2024-06-12 11:27:06 -03:00
parent dc6acbf1f0
commit 01c9f071fa
2 changed files with 14 additions and 26 deletions

View File

@ -625,7 +625,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].content).toContain("Mendable"); expect(completedResponse.body.data[0].content).toContain("Mendable");
const childrenLinks = completedResponse.body.data.filter(doc => const childrenLinks = completedResponse.body.data.filter(doc =>
doc.sourceURL && doc.sourceURL.startsWith("https://mendable.ai/blog") doc.metadata && doc.metadata.sourceURL && doc.metadata.sourceURL.includes("mendable.ai/blog")
); );
expect(childrenLinks.length).toBe(completedResponse.body.data.length); expect(childrenLinks.length).toBe(completedResponse.body.data.length);
@ -816,35 +816,23 @@ describe("E2E Tests for API Routes", () => {
.post("/v0/crawl") .post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json") .set("Content-Type", "application/json")
.send({ url: "https://scrapethissite.com" }); .send({ url: "https://jestjs.io" });
expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
await new Promise((r) => setTimeout(r, 2000)); // Wait for 1 seconds before cancelling the job await new Promise((r) => setTimeout(r, 20000));
const responseCancel = await request(TEST_URL) const responseCancel = await request(TEST_URL)
.delete(`/v0/crawl/cancel/${crawlResponse.body.jobId}`) .delete(`/v0/crawl/cancel/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(responseCancel.statusCode).toBe(200); expect(responseCancel.statusCode).toBe(200);
expect(responseCancel.body).toHaveProperty("status");
expect(responseCancel.body.status).toBe("cancelled");
let isFinished = false; await new Promise((r) => setTimeout(r, 10000));
let completedResponse; const completedResponse = await request(TEST_URL)
while (!isFinished) {
const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`) .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
console.log(response.body.status)
if (response.body.status === "failed") {
isFinished = true;
completedResponse = response;
} else {
await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
}
}
expect(completedResponse.statusCode).toBe(200); expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status"); expect(completedResponse.body).toHaveProperty("status");

View File

@ -55,7 +55,7 @@ export async function crawlController(req: Request, res: Response) {
} }
const mode = req.body.mode ?? "crawl"; const mode = req.body.mode ?? "crawl";
const crawlerOptions = req.body.crawlerOptions ?? { allowBackwardCrawling: false, returnOnlyUrls: true }; const crawlerOptions = req.body.crawlerOptions ?? { allowBackwardCrawling: false };
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false }; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false };
if (mode === "single_urls" && !url.includes(",")) { if (mode === "single_urls" && !url.includes(",")) {
@ -64,7 +64,7 @@ export async function crawlController(req: Request, res: Response) {
await a.setOptions({ await a.setOptions({
mode: "single_urls", mode: "single_urls",
urls: [url], urls: [url],
crawlerOptions: crawlerOptions, crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
pageOptions: pageOptions, pageOptions: pageOptions,
}); });