From b4c6819a5441a69dcbf70d77b58f9f70671674ad Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 5 Jun 2024 11:11:09 -0700 Subject: [PATCH] Nick: --- apps/api/src/lib/entities.ts | 2 ++ apps/api/src/main/runWebScraper.ts | 3 +++ apps/api/src/scraper/WebScraper/index.ts | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index dd5fc72..5511623 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -77,6 +77,8 @@ export class Document { provider?: string; warning?: string; + index?: number; + constructor(data: Partial) { if (!data.content) { throw new Error("Missing required fields"); diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 632d110..dee89bc 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -19,6 +19,9 @@ export async function startWebScraperPipeline({ inProgress: (progress) => { if (progress.currentDocument) { partialDocs.push(progress.currentDocument); + if (partialDocs.length > 50) { + partialDocs = partialDocs.slice(-50); + } job.progress({ ...progress, partialDocs: partialDocs }); } }, diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 04586b2..e3a3cc6 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -72,7 +72,7 @@ export class WebScraperDataProvider { total: totalUrls, status: "SCRAPING", currentDocumentUrl: url, - currentDocument: result, + currentDocument: { ...result, index: processedUrls }, }); }