From d21091bb063964e6c3d7fedcfbb226b8889b8332 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 9 May 2024 17:52:46 -0700 Subject: [PATCH] Update single_url.ts --- apps/api/src/scraper/WebScraper/single_url.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index fee126a..c43ea40 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -202,14 +202,13 @@ export async function scrapSingleUrl( console.log(`Falling back to ${scraper}`); } - if (!text || text.length < 100) { + if (!text) { throw new Error(`All scraping methods failed for URL: ${urlToScrap}`); } const soup = cheerio.load(html); const metadata = extractMetadata(soup, urlToScrap); const document: Document = { - url: urlToScrap, content: text, markdown: text, html: pageOptions.includeHtml ? html : undefined,