From ccc55127d68e64572f34d7ba3a373d70d0b4a4d0 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Wed, 5 Jun 2024 11:48:41 -0300
Subject: [PATCH] Added scroll xpaths on fire-engine for handling readme docs
---
.../scraper/WebScraper/custom/handleCustomScraping.ts | 11 +++++++----
apps/api/src/scraper/WebScraper/single_url.ts | 4 +++-
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
index 5f6c34f..c4a9c26 100644
--- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
+++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
@@ -1,7 +1,7 @@
export async function handleCustomScraping(
text: string,
url: string
-): Promise<{ scraper: string; url: string; wait_after_load: number } | null> {
+): Promise<{ scraper: string; url: string; waitAfterLoad: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
// Check for Readme Docs special case
if (text.includes(',
options?: any
): Promise {
@@ -68,6 +69,7 @@ export async function scrapWithFireEngine(
wait: waitParam,
screenshot: screenshotParam,
headers: headers,
+ pageOptions: pageOptions
}),
});
@@ -332,7 +334,7 @@ export async function scrapSingleUrl(
const customScraperResult = await handleCustomScraping(text, url);
if(customScraperResult){
- customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.wait_after_load)
+ customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.waitAfterLoad, false, customScraperResult.pageOptions)
}
if (customScrapedContent) {