diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index a50e42e..c3d37c4 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -1,6 +1,7 @@ const socialMediaBlocklist = [ 'facebook.com', 'twitter.com', + 'x.com', 'instagram.com', 'linkedin.com', 'pinterest.com', @@ -14,12 +15,25 @@ const socialMediaBlocklist = [ 'telegram.org', ]; -const allowedUrls = [ - 'linkedin.com/pulse' +const allowedKeywords = [ + 'pulse', + 'privacy', + 'terms', + 'policy', + 'user-agreement', + 'legal', + 'help', + 'support', + 'contact', + 'about', + 'careers', + 'blog', + 'press', + 'conditions', ]; export function isUrlBlocked(url: string): boolean { - if (allowedUrls.some(allowedUrl => url.includes(allowedUrl))) { + if (allowedKeywords.some(keyword => url.includes(keyword))) { return false; }