From 87570bdfa1dab843710352098d19bd687acdf3c0 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 15 May 2024 11:06:03 -0700 Subject: [PATCH] Update index.ts --- apps/api/src/scraper/WebScraper/index.ts | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 13f39c2..bdc7483 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -155,22 +155,16 @@ export class WebScraperDataProvider { limit: this.limit, generateImgAltText: this.generateImgAltText, }); - let start = Date.now(); + let links = await crawler.start(inProgress, 5, this.limit, this.maxCrawledDepth); - console.log(links.length) - let end = Date.now(); - console.log("Crawl end in seconds ", (end - start) / 1000); + const allLinks = links.map((e) => e.url); const allHtmls = links.map((e)=> e.html); - console.log("All links", allLinks.length); - console.log("All htmls", allHtmls.length); if (this.returnOnlyUrls) { return this.returnOnlyUrlsResponse(allLinks , inProgress); } - - let fastDocs = [] let documents = []; // check if fast mode is enabled and there is html inside the links if (this.crawlerMode === "fast" && links.some((link) => link.html)) {