From ab9de0f5ab3e0d0056ed5c5e3228b841c34afa64 Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Thu, 13 Jun 2024 18:46:30 -0400 Subject: [PATCH] Update maxDepth tests --- apps/api/src/__tests__/e2e_withAuth/index.test.ts | 9 ++++++--- apps/api/src/scraper/WebScraper/crawler.ts | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index 0ae5ad9..ef4f3e1 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -390,7 +390,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have a maximum depth of 1 urls.forEach((url: string) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(2); }); }, 120000); @@ -432,7 +433,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1 urls.forEach((url: string) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(3); }); }, 120000); @@ -749,7 +751,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have a maximum depth of 1 urls.forEach((url) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(2); }); }, 180000); diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 7720991..33a643b 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -158,7 +158,6 @@ export class WebCrawler { // make sure to run include exclude here again const filteredUrls = this.filterLinks(urls.map(urlObj => urlObj.url), limit, this.maxCrawledDepth); - return filteredUrls.map(url => ({ url, html: urls.find(urlObj => urlObj.url === url)?.html || "" })); }