0
This commit is contained in:
Nicolas 2024-06-05 11:11:09 -07:00
parent 0d51b11dcd
commit b4c6819a54
3 changed files with 6 additions and 1 deletions

View File

@ -77,6 +77,8 @@ export class Document {
provider?: string; provider?: string;
warning?: string; warning?: string;
index?: number;
constructor(data: Partial<Document>) { constructor(data: Partial<Document>) {
if (!data.content) { if (!data.content) {
throw new Error("Missing required fields"); throw new Error("Missing required fields");

View File

@ -19,6 +19,9 @@ export async function startWebScraperPipeline({
inProgress: (progress) => { inProgress: (progress) => {
if (progress.currentDocument) { if (progress.currentDocument) {
partialDocs.push(progress.currentDocument); partialDocs.push(progress.currentDocument);
if (partialDocs.length > 50) {
partialDocs = partialDocs.slice(-50);
}
job.progress({ ...progress, partialDocs: partialDocs }); job.progress({ ...progress, partialDocs: partialDocs });
} }
}, },

View File

@ -72,7 +72,7 @@ export class WebScraperDataProvider {
total: totalUrls, total: totalUrls,
status: "SCRAPING", status: "SCRAPING",
currentDocumentUrl: url, currentDocumentUrl: url,
currentDocument: result, currentDocument: { ...result, index: processedUrls },
}); });
} }