From 5b8aed26dd85a9d5f23e0fd865882dfd5b14a865 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 18:55:39 -0700 Subject: [PATCH] Update scrape.ts --- apps/api/src/controllers/scrape.ts | 57 +++++++++++++++++------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 9173533..04fe525 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -5,17 +5,22 @@ import { checkTeamCredits } from "../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { logJob } from "../../src/services/logging/log_job"; -import { Document } from "../../src/lib/entities"; +import { Document } from "../../src/lib/entities"; export async function scrapeHelper( req: Request, team_id: string, crawlerOptions: any, pageOptions: any -) : Promise<{ success: boolean; error?: string; data?: Document }> { +): Promise<{ + success: boolean; + error?: string; + data?: Document; + returnCode?: number; +}> { const url = req.body.url; if (!url) { - throw new Error("Url is required"); + return { success: false, error: "Url is required", returnCode: 400 }; } const a = new WebScraperDataProvider(); @@ -34,7 +39,7 @@ export async function scrapeHelper( (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 ); if (filteredDocs.length === 0) { - return { success: true, error: "No pages found" }; + return { success: true, error: "No page found", returnCode: 200 }; } const { success, credit_usage } = await billTeam( team_id, @@ -43,12 +48,15 @@ export async function scrapeHelper( if (!success) { return { success: false, - error: "Failed to bill team. Insufficient credits or subscription not found.", + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, }; } return { success: true, data: filteredDocs[0], + returnCode: 200, }; } @@ -77,26 +85,25 @@ export async function scrapeController(req: Request, res: Response) { return res.status(500).json({ error: "Internal server error" }); } - const result = await scrapeHelper( - req, - team_id, - crawlerOptions, - pageOptions - ); - logJob({ - success: result.success, - message: result.error, - num_docs: result.data.length, - docs: result.data, - time_taken: 0, - team_id: team_id, - mode: "scrape", - url: req.body.url, - crawlerOptions: crawlerOptions, - pageOptions: pageOptions, - }); - return res.json(result); - + const result = await scrapeHelper( + req, + team_id, + crawlerOptions, + pageOptions + ); + logJob({ + success: result.success, + message: result.error, + num_docs: 1, + docs: [result.data], + time_taken: 0, + team_id: team_id, + mode: "scrape", + url: req.body.url, + crawlerOptions: crawlerOptions, + pageOptions: pageOptions, + }); + return res.json(result); } catch (error) { console.error(error); return res.status(500).json({ error: error.message });