Merge pull request #242 from mendableai/nsc/partial-data-changes
Partial Data Sliding window of 50
This commit is contained in:
commit
88a5286495
@ -77,6 +77,8 @@ export class Document {
|
||||
provider?: string;
|
||||
warning?: string;
|
||||
|
||||
index?: number;
|
||||
|
||||
constructor(data: Partial<Document>) {
|
||||
if (!data.content) {
|
||||
throw new Error("Missing required fields");
|
||||
|
@ -19,6 +19,9 @@ export async function startWebScraperPipeline({
|
||||
inProgress: (progress) => {
|
||||
if (progress.currentDocument) {
|
||||
partialDocs.push(progress.currentDocument);
|
||||
if (partialDocs.length > 50) {
|
||||
partialDocs = partialDocs.slice(-50);
|
||||
}
|
||||
job.progress({ ...progress, partialDocs: partialDocs });
|
||||
}
|
||||
},
|
||||
|
@ -72,7 +72,7 @@ export class WebScraperDataProvider {
|
||||
total: totalUrls,
|
||||
status: "SCRAPING",
|
||||
currentDocumentUrl: url,
|
||||
currentDocument: result,
|
||||
currentDocument: { ...result, index: processedUrls },
|
||||
});
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user