diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index dd5fc72..5511623 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -77,6 +77,8 @@ export class Document { provider?: string; warning?: string; + index?: number; + constructor(data: Partial) { if (!data.content) { throw new Error("Missing required fields"); diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 632d110..dee89bc 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -19,6 +19,9 @@ export async function startWebScraperPipeline({ inProgress: (progress) => { if (progress.currentDocument) { partialDocs.push(progress.currentDocument); + if (partialDocs.length > 50) { + partialDocs = partialDocs.slice(-50); + } job.progress({ ...progress, partialDocs: partialDocs }); } }, diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 04586b2..e3a3cc6 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -72,7 +72,7 @@ export class WebScraperDataProvider { total: totalUrls, status: "SCRAPING", currentDocumentUrl: url, - currentDocument: result, + currentDocument: { ...result, index: processedUrls }, }); }