diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index 0ae5ad9..ef4f3e1 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -390,7 +390,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have a maximum depth of 1 urls.forEach((url: string) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(2); }); }, 120000); @@ -432,7 +433,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1 urls.forEach((url: string) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(3); }); }, 120000); @@ -749,7 +751,8 @@ describe("E2E Tests for API Routes", () => { // Check if all URLs have a maximum depth of 1 urls.forEach((url) => { - const depth = new URL(url).pathname.split("/").filter(Boolean).length; + const pathSplits = new URL(url).pathname.split('/'); + const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0); expect(depth).toBeLessThanOrEqual(2); }); }, 180000); diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 7720991..33a643b 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -158,7 +158,6 @@ export class WebCrawler { // make sure to run include exclude here again const filteredUrls = this.filterLinks(urls.map(urlObj => urlObj.url), limit, this.maxCrawledDepth); - return filteredUrls.map(url => ({ url, html: urls.find(urlObj => urlObj.url === url)?.html || "" })); }