Merge pull request #242 from mendableai/nsc/partial-data-changes
Partial Data Sliding window of 50
This commit is contained in:
commit
88a5286495
@ -77,6 +77,8 @@ export class Document {
|
|||||||
provider?: string;
|
provider?: string;
|
||||||
warning?: string;
|
warning?: string;
|
||||||
|
|
||||||
|
index?: number;
|
||||||
|
|
||||||
constructor(data: Partial<Document>) {
|
constructor(data: Partial<Document>) {
|
||||||
if (!data.content) {
|
if (!data.content) {
|
||||||
throw new Error("Missing required fields");
|
throw new Error("Missing required fields");
|
||||||
|
@ -19,6 +19,9 @@ export async function startWebScraperPipeline({
|
|||||||
inProgress: (progress) => {
|
inProgress: (progress) => {
|
||||||
if (progress.currentDocument) {
|
if (progress.currentDocument) {
|
||||||
partialDocs.push(progress.currentDocument);
|
partialDocs.push(progress.currentDocument);
|
||||||
|
if (partialDocs.length > 50) {
|
||||||
|
partialDocs = partialDocs.slice(-50);
|
||||||
|
}
|
||||||
job.progress({ ...progress, partialDocs: partialDocs });
|
job.progress({ ...progress, partialDocs: partialDocs });
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -72,7 +72,7 @@ export class WebScraperDataProvider {
|
|||||||
total: totalUrls,
|
total: totalUrls,
|
||||||
status: "SCRAPING",
|
status: "SCRAPING",
|
||||||
currentDocumentUrl: url,
|
currentDocumentUrl: url,
|
||||||
currentDocument: result,
|
currentDocument: { ...result, index: processedUrls },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user