0

Nick: a bit faster

This commit is contained in:
Nicolas 2024-04-19 15:13:17 -07:00
parent c5cb268b61
commit 5b93799149

View File

@ -157,19 +157,23 @@ export class WebScraperDataProvider {
}
if (this.mode === "single_urls") {
let pdfLinks = this.urls.filter((link) => isUrlAPdf({url: link, fastMode: false}));
let pdfDocuments: Document[] = [];
for (let pdfLink of pdfLinks) {
const pdfContent = await fetchAndProcessPdf(pdfLink);
pdfDocuments.push({
content: pdfContent,
metadata: { sourceURL: pdfLink },
provider: "web-scraper"
});
let nonPdfUrls: string[] = [];
for (let url of this.urls) {
if (isUrlAPdf({url: url, fastMode: false})) {
const pdfContent = await fetchAndProcessPdf(url);
pdfDocuments.push({
content: pdfContent,
metadata: { sourceURL: url },
provider: "web-scraper"
});
} else {
nonPdfUrls.push(url);
}
}
let documents = await this.convertUrlsToDocuments(
this.urls.filter((link) => !isUrlAPdf({url: link, fastMode: true})),
nonPdfUrls,
inProgress
);