From 66bd1e402067f3c78cbcade824cf808d5d773276 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 9 May 2024 18:41:15 -0700 Subject: [PATCH] Update website_params.ts --- .../WebScraper/utils/custom/website_params.ts | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts index 17036f4..5f8be9f 100644 --- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -22,9 +22,12 @@ export const urlSpecificParams = { }, }, "support.greenpay.me":{ + defaultScraper: "playwright", params: { wait_browser: "networkidle2", block_resources: false, + wait: 2000, + }, headers: { "User-Agent": @@ -78,5 +81,45 @@ export const urlSpecificParams = { accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", }, + }, + "developers.notion.com":{ + defaultScraper: "playwright", + params: { + wait_browser: "networkidle2", + block_resources: false, + wait: 2000, + }, + headers: { + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "sec-fetch-site": "same-origin", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + referer: "https://www.google.com/", + "accept-language": "en-US,en;q=0.9", + "accept-encoding": "gzip, deflate, br", + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + }, + }, + "docs2.hubitat.com":{ + defaultScraper: "playwright", + params: { + wait_browser: "networkidle2", + block_resources: false, + wait: 2000, + }, + headers: { + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "sec-fetch-site": "same-origin", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + referer: "https://www.google.com/", + "accept-language": "en-US,en;q=0.9", + "accept-encoding": "gzip, deflate, br", + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + }, } };