From 71c98d8b80a9487dff3e5e8ac2e40967fd9a8a0c Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Thu, 13 Jun 2024 18:00:52 -0400 Subject: [PATCH] Update logic --- apps/api/src/__tests__/e2e_withAuth/index.test.ts | 2 +- apps/api/src/scraper/WebScraper/index.ts | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index b0642cf..4ac57e6 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -430,7 +430,7 @@ describe("E2E Tests for API Routes", () => { ); expect(urls.length).toBeGreaterThan(1); - // Check if all URLs have a maximum depth of 1 + // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1 urls.forEach((url: string) => { const depth = new URL(url).pathname.split("/").filter(Boolean).length; expect(depth).toBeLessThanOrEqual(3); diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index e8ce813..67d73d4 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -163,11 +163,10 @@ export class WebScraperDataProvider { inProgress?: (progress: Progress) => void ): Promise { - const baseURLDepth = new URL(this.urls[0]).pathname.split('/').length - 2; + const pathSplits = new URL(this.urls[0]).pathname.split('/'); + const baseURLDepth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); const adjustedMaxDepth = this.maxCrawledDepth + baseURLDepth; - - const crawler = new WebCrawler({ initialUrl: this.urls[0],