From b5045d1661741eda6d137bdb172b185ce748fd62 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 4 Jun 2024 17:47:28 -0300 Subject: [PATCH] [feat] improved the scrape for gdrive pdfs --- .../WebScraper/custom/handleCustomScraping.ts | 17 +++++++++++------ apps/api/src/scraper/WebScraper/single_url.ts | 9 +++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts index 5f6c34f..1301757 100644 --- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts +++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts @@ -1,7 +1,9 @@ +import { fetchAndProcessPdf } from "../utils/pdfProcessor"; + export async function handleCustomScraping( text: string, url: string -): Promise<{ scraper: string; url: string; wait_after_load: number } | null> { +): Promise<{ scraper: string; url: string; wait_after_load?: number } | null> { // Check for Readme Docs special case if (text.includes('