0

Update single_url.ts

This commit is contained in:
Nicolas 2024-05-09 17:52:46 -07:00
parent be85008622
commit d21091bb06

View File

@ -202,14 +202,13 @@ export async function scrapSingleUrl(
console.log(`Falling back to ${scraper}`); console.log(`Falling back to ${scraper}`);
} }
if (!text || text.length < 100) { if (!text) {
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`); throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
} }
const soup = cheerio.load(html); const soup = cheerio.load(html);
const metadata = extractMetadata(soup, urlToScrap); const metadata = extractMetadata(soup, urlToScrap);
const document: Document = { const document: Document = {
url: urlToScrap,
content: text, content: text,
markdown: text, markdown: text,
html: pageOptions.includeHtml ? html : undefined, html: pageOptions.includeHtml ? html : undefined,