0

[Bug] Fixing /crawl limit

This commit is contained in:
rafaelsideguide 2024-05-10 12:15:54 -03:00
parent 66bd1e4020
commit bc6b929b43

View File

@ -25,7 +25,7 @@ export class WebCrawler {
initialUrl, initialUrl,
includes, includes,
excludes, excludes,
maxCrawledLinks, maxCrawledLinks = 10000,
limit = 10000, limit = 10000,
generateImgAltText = false, generateImgAltText = false,
maxCrawledDepth = 10, maxCrawledDepth = 10,
@ -136,7 +136,7 @@ export class WebCrawler {
inProgress?: (progress: Progress) => void inProgress?: (progress: Progress) => void
): Promise<string[]> { ): Promise<string[]> {
const queue = async.queue(async (task: string, callback) => { const queue = async.queue(async (task: string, callback) => {
if (this.crawledUrls.size >= this.maxCrawledLinks) { if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) {
if (callback && typeof callback === "function") { if (callback && typeof callback === "function") {
callback(); callback();
} }
@ -147,14 +147,14 @@ export class WebCrawler {
if (inProgress && newUrls.length > 0) { if (inProgress && newUrls.length > 0) {
inProgress({ inProgress({
current: this.crawledUrls.size, current: this.crawledUrls.size,
total: this.maxCrawledLinks, total: Math.min(this.maxCrawledLinks, this.limit),
status: "SCRAPING", status: "SCRAPING",
currentDocumentUrl: newUrls[newUrls.length - 1], currentDocumentUrl: newUrls[newUrls.length - 1],
}); });
} else if (inProgress) { } else if (inProgress) {
inProgress({ inProgress({
current: this.crawledUrls.size, current: this.crawledUrls.size,
total: this.maxCrawledLinks, total: Math.min(this.maxCrawledLinks, this.limit),
status: "SCRAPING", status: "SCRAPING",
currentDocumentUrl: task, currentDocumentUrl: task,
}); });