Nick: a bit faster
This commit is contained in:
parent
c5cb268b61
commit
5b93799149
@ -157,19 +157,23 @@ export class WebScraperDataProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.mode === "single_urls") {
|
if (this.mode === "single_urls") {
|
||||||
let pdfLinks = this.urls.filter((link) => isUrlAPdf({url: link, fastMode: false}));
|
|
||||||
let pdfDocuments: Document[] = [];
|
let pdfDocuments: Document[] = [];
|
||||||
for (let pdfLink of pdfLinks) {
|
let nonPdfUrls: string[] = [];
|
||||||
const pdfContent = await fetchAndProcessPdf(pdfLink);
|
for (let url of this.urls) {
|
||||||
|
if (isUrlAPdf({url: url, fastMode: false})) {
|
||||||
|
const pdfContent = await fetchAndProcessPdf(url);
|
||||||
pdfDocuments.push({
|
pdfDocuments.push({
|
||||||
content: pdfContent,
|
content: pdfContent,
|
||||||
metadata: { sourceURL: pdfLink },
|
metadata: { sourceURL: url },
|
||||||
provider: "web-scraper"
|
provider: "web-scraper"
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
nonPdfUrls.push(url);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let documents = await this.convertUrlsToDocuments(
|
let documents = await this.convertUrlsToDocuments(
|
||||||
this.urls.filter((link) => !isUrlAPdf({url: link, fastMode: true})),
|
nonPdfUrls,
|
||||||
inProgress
|
inProgress
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user