From 756f54466d37f00850343cc8ed57979a0d587c50 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 20 May 2024 17:24:21 -0700 Subject: [PATCH] Nick: allowed keywords for now --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index a50e42e..ededfc7 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -1,6 +1,7 @@ const socialMediaBlocklist = [ 'facebook.com', 'twitter.com', + 'x.com', 'instagram.com', 'linkedin.com', 'pinterest.com', @@ -14,12 +15,18 @@ const socialMediaBlocklist = [ 'telegram.org', ]; -const allowedUrls = [ - 'linkedin.com/pulse' +const allowedKeywords = [ + 'pulse', + 'privacy', + 'terms', + 'policy', + 'user-agreement', + 'legal', + 'help' ]; export function isUrlBlocked(url: string): boolean { - if (allowedUrls.some(allowedUrl => url.includes(allowedUrl))) { + if (allowedKeywords.some(keyword => url.includes(keyword))) { return false; }