From a680c7ce84985863607d1c10eacae481c28bd29a Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:46:29 -0300 Subject: [PATCH 1/2] [Feat] Server health check + slack message --- apps/api/.env.example | 3 +- apps/api/requests.http | 11 ++++++- apps/api/src/index.ts | 70 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 34e24b1..3cd40c1 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -21,4 +21,5 @@ OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) BULL_AUTH_KEY= # LOGTAIL_KEY= # Use if you're configuring basic logging with logtail PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs \ No newline at end of file +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs +SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages \ No newline at end of file diff --git a/apps/api/requests.http b/apps/api/requests.http index 2350136..751ba5e 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -49,4 +49,13 @@ content-type: application/json ### Check Job Status GET https://api.firecrawl.dev/v0/crawl/status/cfcb71ac-23a3-4da5-bd85-d4e58b871d66 -Authorization: Bearer \ No newline at end of file +Authorization: Bearer + +### Get Active Jobs Count +GET http://localhost:3002/serverHealthCheck +content-type: application/json + +### Notify Server Health Check +GET http://localhost:3002/serverHealthCheck/notify +content-type: application/json + diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index a2e5c51..6417f36 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -87,6 +87,76 @@ app.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, async (req, res) => { } }); +app.get(`/serverHealthCheck`, async (req, res) => { + try { + const webScraperQueue = getWebScraperQueue(); + const [activeJobs] = await Promise.all([ + webScraperQueue.getActiveCount(), + ]); + + const noActiveJobs = activeJobs === 0; + // 200 if no active jobs, 503 if there are active jobs + return res.status(noActiveJobs ? 200 : 500).json({ + activeJobs, + }); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } +}); + +app.get('/serverHealthCheck/notify', async (req, res) => { + if (process.env.SLACK_WEBHOOK_URL) { + const treshold = 5; // The treshold value for the active jobs + const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds + + const getActiveJobs = async () => { + const webScraperQueue = getWebScraperQueue(); + const [activeJobs] = await Promise.all([ + webScraperQueue.getActiveCount(), + ]); + + return activeJobs; + }; + + res.status(200).json({ message: "Check initiated" }); + + const checkActiveJobs = async () => { + try { + let activeJobs = await getActiveJobs(); + if (activeJobs >= treshold) { + setTimeout(async () => { + activeJobs = await getActiveJobs(); // Re-check the active jobs count + if (activeJobs >= treshold) { + const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL; + const message = { + text: `⚠️ Warning: The number of active jobs (${activeJobs}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, + }; + + const response = await fetch(slackWebhookUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(message), + }) + + if (!response.ok) { + console.error('Failed to send Slack notification') + } + } + }, timeout); + } + } catch (error) { + console.error(error); + } + }; + + checkActiveJobs(); + } +}); + + app.get("/is-production", (req, res) => { res.send({ isProduction: global.isProduction }); }); From 9b01dc62817dca9488d890f0f58a5c4e654e7fa1 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:07:22 -0300 Subject: [PATCH 2/2] Changed from active to waiting jobs --- apps/api/src/index.ts | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 6417f36..27e8713 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -90,14 +90,14 @@ app.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, async (req, res) => { app.get(`/serverHealthCheck`, async (req, res) => { try { const webScraperQueue = getWebScraperQueue(); - const [activeJobs] = await Promise.all([ - webScraperQueue.getActiveCount(), + const [waitingJobs] = await Promise.all([ + webScraperQueue.getWaitingCount(), ]); - const noActiveJobs = activeJobs === 0; + const noWaitingJobs = waitingJobs === 0; // 200 if no active jobs, 503 if there are active jobs - return res.status(noActiveJobs ? 200 : 500).json({ - activeJobs, + return res.status(noWaitingJobs ? 200 : 500).json({ + waitingJobs, }); } catch (error) { console.error(error); @@ -107,30 +107,31 @@ app.get(`/serverHealthCheck`, async (req, res) => { app.get('/serverHealthCheck/notify', async (req, res) => { if (process.env.SLACK_WEBHOOK_URL) { - const treshold = 5; // The treshold value for the active jobs + const treshold = 1; // The treshold value for the active jobs const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds - const getActiveJobs = async () => { + const getWaitingJobsCount = async () => { const webScraperQueue = getWebScraperQueue(); - const [activeJobs] = await Promise.all([ - webScraperQueue.getActiveCount(), + const [waitingJobsCount] = await Promise.all([ + webScraperQueue.getWaitingCount(), ]); - return activeJobs; + return waitingJobsCount; }; res.status(200).json({ message: "Check initiated" }); - const checkActiveJobs = async () => { + const checkWaitingJobs = async () => { try { - let activeJobs = await getActiveJobs(); - if (activeJobs >= treshold) { + let waitingJobsCount = await getWaitingJobsCount(); + if (waitingJobsCount >= treshold) { setTimeout(async () => { - activeJobs = await getActiveJobs(); // Re-check the active jobs count - if (activeJobs >= treshold) { + // Re-check the waiting jobs count after the timeout + waitingJobsCount = await getWaitingJobsCount(); + if (waitingJobsCount >= treshold) { const slackWebhookUrl = process.env.SLACK_WEBHOOK_URL; const message = { - text: `⚠️ Warning: The number of active jobs (${activeJobs}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, + text: `⚠️ Warning: The number of active jobs (${waitingJobsCount}) has exceeded the threshold (${treshold}) for more than ${timeout/60000} minute(s).`, }; const response = await fetch(slackWebhookUrl, { @@ -152,7 +153,7 @@ app.get('/serverHealthCheck/notify', async (req, res) => { } }; - checkActiveJobs(); + checkWaitingJobs(); } });