Merge pull request #143 from mendableai/bug/crawl-limit
[Bug] Fixing /crawl limit
This commit is contained in:
commit
df0550d2f6
@ -266,7 +266,7 @@ describe("E2E Tests for API Routes", () => {
|
|||||||
urls.forEach((url: string) => {
|
urls.forEach((url: string) => {
|
||||||
expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
|
expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
|
||||||
});
|
});
|
||||||
}, 60000); // 60 seconds
|
}, 90000); // 90 seconds
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and limit to 3", async () => {
|
it("should return a successful response with a valid API key and limit to 3", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
|
@ -25,7 +25,7 @@ export class WebCrawler {
|
|||||||
initialUrl,
|
initialUrl,
|
||||||
includes,
|
includes,
|
||||||
excludes,
|
excludes,
|
||||||
maxCrawledLinks,
|
maxCrawledLinks = 10000,
|
||||||
limit = 10000,
|
limit = 10000,
|
||||||
generateImgAltText = false,
|
generateImgAltText = false,
|
||||||
maxCrawledDepth = 10,
|
maxCrawledDepth = 10,
|
||||||
@ -152,7 +152,7 @@ export class WebCrawler {
|
|||||||
inProgress?: (progress: Progress) => void,
|
inProgress?: (progress: Progress) => void,
|
||||||
): Promise<{ url: string, html: string }[]> {
|
): Promise<{ url: string, html: string }[]> {
|
||||||
const queue = async.queue(async (task: string, callback) => {
|
const queue = async.queue(async (task: string, callback) => {
|
||||||
if (this.crawledUrls.size >= this.maxCrawledLinks) {
|
if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) {
|
||||||
if (callback && typeof callback === "function") {
|
if (callback && typeof callback === "function") {
|
||||||
callback();
|
callback();
|
||||||
}
|
}
|
||||||
@ -176,14 +176,14 @@ export class WebCrawler {
|
|||||||
if (inProgress && newUrls.length > 0) {
|
if (inProgress && newUrls.length > 0) {
|
||||||
inProgress({
|
inProgress({
|
||||||
current: this.crawledUrls.size,
|
current: this.crawledUrls.size,
|
||||||
total: this.maxCrawledLinks,
|
total: Math.min(this.maxCrawledLinks, this.limit),
|
||||||
status: "SCRAPING",
|
status: "SCRAPING",
|
||||||
currentDocumentUrl: newUrls[newUrls.length - 1].url,
|
currentDocumentUrl: newUrls[newUrls.length - 1].url,
|
||||||
});
|
});
|
||||||
} else if (inProgress) {
|
} else if (inProgress) {
|
||||||
inProgress({
|
inProgress({
|
||||||
current: this.crawledUrls.size,
|
current: this.crawledUrls.size,
|
||||||
total: this.maxCrawledLinks,
|
total: Math.min(this.maxCrawledLinks, this.limit),
|
||||||
status: "SCRAPING",
|
status: "SCRAPING",
|
||||||
currentDocumentUrl: task,
|
currentDocumentUrl: task,
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user