0

Nick: a bit faster

This commit is contained in:
Nicolas 2024-04-19 15:13:17 -07:00
parent c5cb268b61
commit 5b93799149

View File

@ -157,19 +157,23 @@ export class WebScraperDataProvider {
} }
if (this.mode === "single_urls") { if (this.mode === "single_urls") {
let pdfLinks = this.urls.filter((link) => isUrlAPdf({url: link, fastMode: false}));
let pdfDocuments: Document[] = []; let pdfDocuments: Document[] = [];
for (let pdfLink of pdfLinks) { let nonPdfUrls: string[] = [];
const pdfContent = await fetchAndProcessPdf(pdfLink); for (let url of this.urls) {
pdfDocuments.push({ if (isUrlAPdf({url: url, fastMode: false})) {
content: pdfContent, const pdfContent = await fetchAndProcessPdf(url);
metadata: { sourceURL: pdfLink }, pdfDocuments.push({
provider: "web-scraper" content: pdfContent,
}); metadata: { sourceURL: url },
provider: "web-scraper"
});
} else {
nonPdfUrls.push(url);
}
} }
let documents = await this.convertUrlsToDocuments( let documents = await this.convertUrlsToDocuments(
this.urls.filter((link) => !isUrlAPdf({url: link, fastMode: true})), nonPdfUrls,
inProgress inProgress
); );