From fdb2789eaa302b2f90bed7f1dad6dcc95613cb1f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 23 Apr 2024 17:14:34 -0700 Subject: [PATCH] Nick: added url as return param --- apps/api/src/lib/entities.ts | 1 + apps/api/src/scraper/WebScraper/single_url.ts | 2 ++ 2 files changed, 3 insertions(+) diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 062212b..fdc1c61 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -40,6 +40,7 @@ export type WebScraperOptions = { export class Document { id?: string; + url?: string; // Used only in /search for now content: string; markdown?: string; createdAt?: Date; diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index e110b0e..6ab3003 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -154,10 +154,12 @@ export async function scrapSingleUrl( // } let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); + // Basically means that it is using /search endpoint if(pageOptions.fallback === false){ const soup = cheerio.load(html); const metadata = extractMetadata(soup, urlToScrap); return { + url: urlToScrap, content: text, markdown: text, metadata: { ...metadata, sourceURL: urlToScrap },