0

Update crawl.test.ts

This commit is contained in:
Nicolas 2024-05-15 12:50:13 -07:00
parent 58053eb423
commit 499671c87f

View File

@ -27,8 +27,6 @@ describe("Crawling Checkup (E2E)", () => {
describe("Crawling website tests with a dataset", () => { describe("Crawling website tests with a dataset", () => {
it("Should crawl the website and verify the response", async () => { it("Should crawl the website and verify the response", async () => {
let passedTests = 0; let passedTests = 0;
const batchSize = 15;
const batchPromises = [];
const startTime = new Date().getTime(); const startTime = new Date().getTime();
const date = new Date(); const date = new Date();
const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`; const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`;
@ -36,12 +34,9 @@ describe("Crawling Checkup (E2E)", () => {
let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`; let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`;
const errorLog: WebsiteScrapeError[] = []; const errorLog: WebsiteScrapeError[] = [];
for (let i = 0; i < websitesData.length; i += batchSize) { for (const websiteData of websitesData) {
await new Promise(resolve => setTimeout(resolve, 10000)); await new Promise(resolve => setTimeout(resolve, 10000));
const batch = websitesData.slice(i, i + batchSize);
const batchPromise = Promise.all(
batch.map(async (websiteData: WebsiteData) => {
try { try {
const crawlResponse = await request(TEST_URL || "") const crawlResponse = await request(TEST_URL || "")
.post("/v0/crawl") .post("/v0/crawl")
@ -70,7 +65,7 @@ describe("Crawling Checkup (E2E)", () => {
if(!completedResponse) { if(!completedResponse) {
// fail the test // fail the test
console.log('No response'); console.log('No response');
return null; continue;
} }
if (!completedResponse.body.data) { if (!completedResponse.body.data) {
@ -85,47 +80,7 @@ describe("Crawling Checkup (E2E)", () => {
console.log('-------------------') console.log('-------------------')
// if (!completedResponse.body || completedResponse.body.status !== "completed") {
// errorLog.push({
// website: websiteData.website,
// prompt: 'CRAWL',
// expected_output: 'SUCCESS',
// actual_output: 'FAILURE',
// error: `Crawl job did not complete successfully.`
// });
// return null;
// }
// // check how many webpages were crawled successfully
// // compares with expected_num_of_pages
// if (completedResponse.body.data.length < websiteData.expected_min_num_of_pages) {
// errorLog.push({
// website: websiteData.website,
// prompt: 'CRAWL',
// expected_output: `SUCCESS: ${websiteData.expected_min_num_of_pages}`,
// actual_output: `FAILURE: ${completedResponse.body.data.length}`,
// error: `Expected at least ${websiteData.expected_min_num_of_pages} webpages, but got ${completedResponse.body.data.length}`
// });
// return null;
// }
// // checks if crawled pages contain expected_crawled_pages
// if (websiteData.expected_crawled_pages.some(page => !completedResponse.body.data.includes(page))) {
// errorLog.push({
// website: websiteData.website,
// prompt: 'CRAWL',
// expected_output: `SUCCESS: ${websiteData.expected_crawled_pages}`,
// actual_output: `FAILURE: ${completedResponse.body.data}`,
// error: `Expected crawled pages to contain ${websiteData.expected_crawled_pages}, but got ${completedResponse.body.data}`
// });
// return null;
// }
passedTests++; passedTests++;
return {
website: websiteData.website,
statusCode: completedResponse.statusCode,
};
} catch (error) { } catch (error) {
console.error(`Error processing ${websiteData.website}: ${error}`); console.error(`Error processing ${websiteData.website}: ${error}`);
errorLog.push({ errorLog.push({
@ -135,14 +90,9 @@ describe("Crawling Checkup (E2E)", () => {
actual_output: 'FAILURE', actual_output: 'FAILURE',
error: `Error processing ${websiteData.website}: ${error}` error: `Error processing ${websiteData.website}: ${error}`
}); });
return null;
} }
})
);
batchPromises.push(batchPromise);
} }
(await Promise.all(batchPromises)).flat();
const score = (passedTests / websitesData.length) * 100; const score = (passedTests / websitesData.length) * 100;
const endTime = new Date().getTime(); const endTime = new Date().getTime();
const timeTaken = (endTime - startTime) / 1000; const timeTaken = (endTime - startTime) / 1000;