Nick: added url as return param
This commit is contained in:
parent
3abfd6b4c1
commit
fdb2789eaa
@ -40,6 +40,7 @@ export type WebScraperOptions = {
|
|||||||
|
|
||||||
export class Document {
|
export class Document {
|
||||||
id?: string;
|
id?: string;
|
||||||
|
url?: string; // Used only in /search for now
|
||||||
content: string;
|
content: string;
|
||||||
markdown?: string;
|
markdown?: string;
|
||||||
createdAt?: Date;
|
createdAt?: Date;
|
||||||
|
@ -154,10 +154,12 @@ export async function scrapSingleUrl(
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
let [text, html] = await attemptScraping(urlToScrap, "scrapingBee");
|
let [text, html] = await attemptScraping(urlToScrap, "scrapingBee");
|
||||||
|
// Basically means that it is using /search endpoint
|
||||||
if(pageOptions.fallback === false){
|
if(pageOptions.fallback === false){
|
||||||
const soup = cheerio.load(html);
|
const soup = cheerio.load(html);
|
||||||
const metadata = extractMetadata(soup, urlToScrap);
|
const metadata = extractMetadata(soup, urlToScrap);
|
||||||
return {
|
return {
|
||||||
|
url: urlToScrap,
|
||||||
content: text,
|
content: text,
|
||||||
markdown: text,
|
markdown: text,
|
||||||
metadata: { ...metadata, sourceURL: urlToScrap },
|
metadata: { ...metadata, sourceURL: urlToScrap },
|
||||||
|
Loading…
Reference in New Issue
Block a user