From 32e814bedcc63350e63aa4c7ed3288eb29e368d6 Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Thu, 13 Jun 2024 17:02:30 -0400 Subject: [PATCH] Update index.ts --- apps/api/src/scraper/WebScraper/index.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index f432f43..dfb52c4 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -163,12 +163,15 @@ export class WebScraperDataProvider { inProgress?: (progress: Progress) => void ): Promise { + const baseURLDepth = new URL(this.urls[0]).pathname.split('/').length - 1; + const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth; + const crawler = new WebCrawler({ initialUrl: this.urls[0], includes: this.includes, excludes: this.excludes, maxCrawledLinks: this.maxCrawledLinks, - maxCrawledDepth: this.maxCrawledDepth, + maxCrawledDepth: adjustedMaxDepth, limit: this.limit, generateImgAltText: this.generateImgAltText, allowBackwardCrawling: this.allowBackwardCrawling,