Update single_url.ts
This commit is contained in:
parent
be85008622
commit
d21091bb06
@ -202,14 +202,13 @@ export async function scrapSingleUrl(
|
|||||||
console.log(`Falling back to ${scraper}`);
|
console.log(`Falling back to ${scraper}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!text || text.length < 100) {
|
if (!text) {
|
||||||
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const soup = cheerio.load(html);
|
const soup = cheerio.load(html);
|
||||||
const metadata = extractMetadata(soup, urlToScrap);
|
const metadata = extractMetadata(soup, urlToScrap);
|
||||||
const document: Document = {
|
const document: Document = {
|
||||||
url: urlToScrap,
|
|
||||||
content: text,
|
content: text,
|
||||||
markdown: text,
|
markdown: text,
|
||||||
html: pageOptions.includeHtml ? html : undefined,
|
html: pageOptions.includeHtml ? html : undefined,
|
||||||
|
Loading…
Reference in New Issue
Block a user