From 0db0874b00742e7e7a6439a975501a397da5d6b8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sat, 20 Apr 2024 19:37:45 -0700 Subject: [PATCH] Nick: --- apps/api/src/controllers/crawl.ts | 2 ++ apps/api/src/controllers/crawlPreview.ts | 2 ++ apps/api/src/controllers/scrape.ts | 8 ++++++-- apps/api/src/main/runWebScraper.ts | 10 +++++++--- apps/api/src/services/logging/log_job.ts | 3 ++- apps/api/src/services/queue-worker.ts | 6 ++++-- apps/api/src/services/webhook.ts | 9 +++++++-- apps/api/src/types.ts | 2 ++ 8 files changed, 32 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 2f7f842..17cfa62 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) { returnOnlyUrls: true, }, pageOptions: pageOptions, + }); const docs = await a.getDocuments(false, (progress) => { @@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) { crawlerOptions: { ...crawlerOptions }, team_id: team_id, pageOptions: pageOptions, + origin: req.body.origin ?? "api", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 641468c..3f28ef6 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) { const mode = req.body.mode ?? "crawl"; const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const job = await addWebScraperJob({ url: url, mode: mode ?? "crawl", // fix for single urls not working crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, team_id: "preview", pageOptions: pageOptions, + origin: "website-preview", }); res.json({ jobId: job.id }); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 51d14f2..632fff5 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) { } const crawlerOptions = req.body.crawlerOptions ?? {}; const pageOptions = req.body.pageOptions ?? { onlyMainContent: false }; + const origin = req.body.origin ?? "api"; try { const { success: creditsCheckSuccess, message: creditsCheckMessage } = @@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) { console.error(error); return res.status(500).json({ error: "Internal server error" }); } - + const startTime = new Date().getTime(); const result = await scrapeHelper( req, team_id, crawlerOptions, pageOptions ); + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; logJob({ success: result.success, message: result.error, num_docs: 1, docs: [result.data], - time_taken: 0, + time_taken: timeTakenInSeconds, team_id: team_id, mode: "scrape", url: req.body.url, crawlerOptions: crawlerOptions, pageOptions: pageOptions, + origin: origin, }); return res.status(result.returnCode).json(result); } catch (error) { diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 0f562a0..d943429 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -44,7 +44,11 @@ export async function runWebScraper({ onSuccess: (result: any) => void; onError: (error: any) => void; team_id: string; -}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> { +}): Promise<{ + success: boolean; + message: string; + docs: CrawlResult[]; +}> { try { const provider = new WebScraperDataProvider(); if (mode === "crawl") { @@ -70,7 +74,7 @@ export async function runWebScraper({ return { success: true, message: "No pages found", - docs: [], + docs: [] }; } @@ -87,7 +91,7 @@ export async function runWebScraper({ return { success: false, message: "Failed to bill team, no subscription was found", - docs: [], + docs: [] }; } diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index cb7e648..639b3a8 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) { num_docs: job.num_docs, docs: job.docs, time_taken: job.time_taken, - team_id: job.team_id, + team_id: job.team_id === "preview" ? null : job.team_id, mode: job.mode, url: job.url, crawler_options: job.crawlerOptions, page_options: job.pageOptions, + origin: job.origin, }, ]); if (error) { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index d436401..dda876a 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -17,10 +17,11 @@ getWebScraperQueue().process( current_url: "", }); const start = Date.now(); + console.log("Processing job", job.data); const { success, message, docs } = await startWebScraperPipeline({ job }); const end = Date.now(); const timeTakenInSeconds = (end - start) / 1000; - + const data = { success: success, result: { @@ -33,7 +34,7 @@ getWebScraperQueue().process( }; await callWebhook(job.data.team_id, data); - + await logJob({ success: success, message: message, @@ -45,6 +46,7 @@ getWebScraperQueue().process( url: job.data.url, crawlerOptions: job.data.crawlerOptions, pageOptions: job.data.pageOptions, + origin: job.data.origin, }); done(null, data); } catch (error) { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index a086425..ab1f90e 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,6 +1,7 @@ import { supabase_service } from "./supabase"; export const callWebhook = async (teamId: string, data: any) => { + try { const { data: webhooksData, error } = await supabase_service .from('webhooks') .select('url') @@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => { data: dataToSend, error: data.error || undefined, }), - }); -} \ No newline at end of file + }); + } catch (error) { + console.error(`Error sending webhook for team ID: ${teamId}`, error.message); + } +}; + diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index e3fc5dc..f9e5c73 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -22,6 +22,7 @@ export interface WebScraperOptions { crawlerOptions: any; pageOptions: any; team_id: string; + origin?: string; } @@ -36,6 +37,7 @@ export interface FirecrawlJob { url: string; crawlerOptions?: any; pageOptions?: any; + origin: string; }