0

Merge pull request #242 from mendableai/nsc/partial-data-changes

Partial Data Sliding window of 50
This commit is contained in:
Nicolas 2024-06-05 11:17:21 -07:00 committed by GitHub
commit 88a5286495
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 6 additions and 1 deletions

View File

@ -77,6 +77,8 @@ export class Document {
provider?: string;
warning?: string;
index?: number;
constructor(data: Partial<Document>) {
if (!data.content) {
throw new Error("Missing required fields");

View File

@ -19,6 +19,9 @@ export async function startWebScraperPipeline({
inProgress: (progress) => {
if (progress.currentDocument) {
partialDocs.push(progress.currentDocument);
if (partialDocs.length > 50) {
partialDocs = partialDocs.slice(-50);
}
job.progress({ ...progress, partialDocs: partialDocs });
}
},

View File

@ -72,7 +72,7 @@ export class WebScraperDataProvider {
total: totalUrls,
status: "SCRAPING",
currentDocumentUrl: url,
currentDocument: result,
currentDocument: { ...result, index: processedUrls },
});
}