From ccc55127d68e64572f34d7ba3a373d70d0b4a4d0 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 5 Jun 2024 11:48:41 -0300 Subject: [PATCH] Added scroll xpaths on fire-engine for handling readme docs --- .../scraper/WebScraper/custom/handleCustomScraping.ts | 11 +++++++---- apps/api/src/scraper/WebScraper/single_url.ts | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts index 5f6c34f..c4a9c26 100644 --- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts +++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts @@ -1,7 +1,7 @@ export async function handleCustomScraping( text: string, url: string -): Promise<{ scraper: string; url: string; wait_after_load: number } | null> { +): Promise<{ scraper: string; url: string; waitAfterLoad: number, pageOptions?: { scrollXPaths?: string[] } } | null> { // Check for Readme Docs special case if (text.includes(', options?: any ): Promise { @@ -68,6 +69,7 @@ export async function scrapWithFireEngine( wait: waitParam, screenshot: screenshotParam, headers: headers, + pageOptions: pageOptions }), }); @@ -332,7 +334,7 @@ export async function scrapSingleUrl( const customScraperResult = await handleCustomScraping(text, url); if(customScraperResult){ - customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.wait_after_load) + customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.waitAfterLoad, false, customScraperResult.pageOptions) } if (customScrapedContent) {