0

Update logic

This commit is contained in:
Eric Ciarla 2024-06-13 18:00:52 -04:00
parent 095951aa4d
commit 71c98d8b80
2 changed files with 3 additions and 4 deletions

View File

@ -430,7 +430,7 @@ describe("E2E Tests for API Routes", () => {
); );
expect(urls.length).toBeGreaterThan(1); expect(urls.length).toBeGreaterThan(1);
// Check if all URLs have a maximum depth of 1 // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
urls.forEach((url: string) => { urls.forEach((url: string) => {
const depth = new URL(url).pathname.split("/").filter(Boolean).length; const depth = new URL(url).pathname.split("/").filter(Boolean).length;
expect(depth).toBeLessThanOrEqual(3); expect(depth).toBeLessThanOrEqual(3);

View File

@ -163,11 +163,10 @@ export class WebScraperDataProvider {
inProgress?: (progress: Progress) => void inProgress?: (progress: Progress) => void
): Promise<Document[]> { ): Promise<Document[]> {
const baseURLDepth = new URL(this.urls[0]).pathname.split('/').length - 2; const pathSplits = new URL(this.urls[0]).pathname.split('/');
const baseURLDepth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth; const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth;
const crawler = new WebCrawler({ const crawler = new WebCrawler({
initialUrl: this.urls[0], initialUrl: this.urls[0],