0

Nick: added url as return param

This commit is contained in:
Nicolas 2024-04-23 17:14:34 -07:00
parent 3abfd6b4c1
commit fdb2789eaa
2 changed files with 3 additions and 0 deletions

View File

@ -40,6 +40,7 @@ export type WebScraperOptions = {
export class Document { export class Document {
id?: string; id?: string;
url?: string; // Used only in /search for now
content: string; content: string;
markdown?: string; markdown?: string;
createdAt?: Date; createdAt?: Date;

View File

@ -154,10 +154,12 @@ export async function scrapSingleUrl(
// } // }
let [text, html] = await attemptScraping(urlToScrap, "scrapingBee"); let [text, html] = await attemptScraping(urlToScrap, "scrapingBee");
// Basically means that it is using /search endpoint
if(pageOptions.fallback === false){ if(pageOptions.fallback === false){
const soup = cheerio.load(html); const soup = cheerio.load(html);
const metadata = extractMetadata(soup, urlToScrap); const metadata = extractMetadata(soup, urlToScrap);
return { return {
url: urlToScrap,
content: text, content: text,
markdown: text, markdown: text,
metadata: { ...metadata, sourceURL: urlToScrap }, metadata: { ...metadata, sourceURL: urlToScrap },