0

Update index.ts

This commit is contained in:
Nicolas 2024-05-15 11:06:03 -07:00
parent 27e1e22a0a
commit 87570bdfa1

View File

@ -155,22 +155,16 @@ export class WebScraperDataProvider {
limit: this.limit, limit: this.limit,
generateImgAltText: this.generateImgAltText, generateImgAltText: this.generateImgAltText,
}); });
let start = Date.now();
let links = await crawler.start(inProgress, 5, this.limit, this.maxCrawledDepth); let links = await crawler.start(inProgress, 5, this.limit, this.maxCrawledDepth);
console.log(links.length)
let end = Date.now();
console.log("Crawl end in seconds ", (end - start) / 1000);
const allLinks = links.map((e) => e.url); const allLinks = links.map((e) => e.url);
const allHtmls = links.map((e)=> e.html); const allHtmls = links.map((e)=> e.html);
console.log("All links", allLinks.length);
console.log("All htmls", allHtmls.length);
if (this.returnOnlyUrls) { if (this.returnOnlyUrls) {
return this.returnOnlyUrlsResponse(allLinks , inProgress); return this.returnOnlyUrlsResponse(allLinks , inProgress);
} }
let fastDocs = []
let documents = []; let documents = [];
// check if fast mode is enabled and there is html inside the links // check if fast mode is enabled and there is html inside the links
if (this.crawlerMode === "fast" && links.some((link) => link.html)) { if (this.crawlerMode === "fast" && links.some((link) => link.html)) {