From a5d38039f2d55a8dfbe2872e2a61ca92120cf959 Mon Sep 17 00:00:00 2001 From: tractorjuice <129532814+tractorjuice@users.noreply.github.com> Date: Sat, 27 Apr 2024 11:03:27 +0100 Subject: [PATCH 1/3] Add additional file extensions to crawler.ts Add additional file extensions. --- apps/api/src/scraper/WebScraper/crawler.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 23cb629..3c7aefa 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -267,6 +267,13 @@ export class WebCrawler { ".docx", ".xlsx", ".xml", + ".pptx", + ".avi", + ".flv", + ".woff", + ".ttf", + ".woff2", + ".webp", ]; return fileExtensions.some((ext) => url.endsWith(ext)); } From 0f694e06082de99384dd89abfbb7fed25018f4d3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 27 Apr 2024 11:14:52 -0700 Subject: [PATCH 2/3] Update crawler.ts --- apps/api/src/scraper/WebScraper/crawler.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 3c7aefa..adc71c5 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -267,7 +267,6 @@ export class WebCrawler { ".docx", ".xlsx", ".xml", - ".pptx", ".avi", ".flv", ".woff", From 1dc6458c6a668a79a277ac161604bbfef18d17e1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 27 Apr 2024 11:17:10 -0700 Subject: [PATCH 3/3] Update crawler.ts --- apps/api/src/scraper/WebScraper/crawler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index adc71c5..7bf9988 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -272,7 +272,7 @@ export class WebCrawler { ".woff", ".ttf", ".woff2", - ".webp", + ".webp" ]; return fileExtensions.some((ext) => url.endsWith(ext)); }