diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 062212b..fdc1c61 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -40,6 +40,7 @@ export type WebScraperOptions = { export class Document { id?: string; + url?: string; // Used only in /search for now content: string; markdown?: string; createdAt?: Date; diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index e110b0e..6ab3003 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -154,10 +154,12 @@ export async function scrapSingleUrl( // } let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); + // Basically means that it is using /search endpoint if(pageOptions.fallback === false){ const soup = cheerio.load(html); const metadata = extractMetadata(soup, urlToScrap); return { + url: urlToScrap, content: text, markdown: text, metadata: { ...metadata, sourceURL: urlToScrap },