0

Update index.ts

This commit is contained in:
Eric Ciarla 2024-06-13 17:02:30 -04:00
parent d48c0df6c5
commit 32e814bedc

View File

@ -163,12 +163,15 @@ export class WebScraperDataProvider {
inProgress?: (progress: Progress) => void inProgress?: (progress: Progress) => void
): Promise<Document[]> { ): Promise<Document[]> {
const baseURLDepth = new URL(this.urls[0]).pathname.split('/').length - 1;
const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth;
const crawler = new WebCrawler({ const crawler = new WebCrawler({
initialUrl: this.urls[0], initialUrl: this.urls[0],
includes: this.includes, includes: this.includes,
excludes: this.excludes, excludes: this.excludes,
maxCrawledLinks: this.maxCrawledLinks, maxCrawledLinks: this.maxCrawledLinks,
maxCrawledDepth: this.maxCrawledDepth, maxCrawledDepth: adjustedMaxDepth,
limit: this.limit, limit: this.limit,
generateImgAltText: this.generateImgAltText, generateImgAltText: this.generateImgAltText,
allowBackwardCrawling: this.allowBackwardCrawling, allowBackwardCrawling: this.allowBackwardCrawling,