Update index.ts
This commit is contained in:
parent
27e1e22a0a
commit
87570bdfa1
@ -155,22 +155,16 @@ export class WebScraperDataProvider {
|
|||||||
limit: this.limit,
|
limit: this.limit,
|
||||||
generateImgAltText: this.generateImgAltText,
|
generateImgAltText: this.generateImgAltText,
|
||||||
});
|
});
|
||||||
let start = Date.now();
|
|
||||||
let links = await crawler.start(inProgress, 5, this.limit, this.maxCrawledDepth);
|
let links = await crawler.start(inProgress, 5, this.limit, this.maxCrawledDepth);
|
||||||
console.log(links.length)
|
|
||||||
let end = Date.now();
|
|
||||||
console.log("Crawl end in seconds ", (end - start) / 1000);
|
|
||||||
const allLinks = links.map((e) => e.url);
|
const allLinks = links.map((e) => e.url);
|
||||||
const allHtmls = links.map((e)=> e.html);
|
const allHtmls = links.map((e)=> e.html);
|
||||||
console.log("All links", allLinks.length);
|
|
||||||
console.log("All htmls", allHtmls.length);
|
|
||||||
|
|
||||||
if (this.returnOnlyUrls) {
|
if (this.returnOnlyUrls) {
|
||||||
return this.returnOnlyUrlsResponse(allLinks , inProgress);
|
return this.returnOnlyUrlsResponse(allLinks , inProgress);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let fastDocs = []
|
|
||||||
let documents = [];
|
let documents = [];
|
||||||
// check if fast mode is enabled and there is html inside the links
|
// check if fast mode is enabled and there is html inside the links
|
||||||
if (this.crawlerMode === "fast" && links.some((link) => link.html)) {
|
if (this.crawlerMode === "fast" && links.some((link) => link.html)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user