From b361a76282e88b678d31306d1469f609f4d135a1 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sat, 20 Apr 2024 19:53:04 -0700 Subject: [PATCH 01/11] Caleb: added logging improvement --- .gitignore | 2 ++ apps/api/.env.local | 14 -------------- apps/api/src/__tests__/e2e/index.test.ts | 14 ++++++++++++-- apps/api/src/services/logtail.ts | 23 +++++++++++++++++++---- 4 files changed, 33 insertions(+), 20 deletions(-) delete mode 100644 apps/api/.env.local diff --git a/.gitignore b/.gitignore index cbfb076..9029012 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ dump.rdb /mongo-data apps/js-sdk/node_modules/ + +apps/api/.env.local diff --git a/apps/api/.env.local b/apps/api/.env.local deleted file mode 100644 index f5c625f..0000000 --- a/apps/api/.env.local +++ /dev/null @@ -1,14 +0,0 @@ -NUM_WORKERS_PER_QUEUE=8 -PORT= -HOST= -SUPABASE_ANON_TOKEN= -SUPABASE_URL= -SUPABASE_SERVICE_TOKEN= -REDIS_URL= -SCRAPING_BEE_API_KEY= -OPENAI_API_KEY= -BULL_AUTH_KEY= -LOGTAIL_KEY= -PLAYWRIGHT_MICROSERVICE_URL= -LLAMAPARSE_API_KEY= -TEST_API_KEY= \ No newline at end of file diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e/index.test.ts index 554453b..ebf87c6 100644 --- a/apps/api/src/__tests__/e2e/index.test.ts +++ b/apps/api/src/__tests__/e2e/index.test.ts @@ -3,12 +3,20 @@ import { app } from '../../index'; import dotenv from 'dotenv'; dotenv.config(); -const TEST_URL = 'http://localhost:3002' + +// const TEST_URL = 'http://localhost:3002' +const TEST_URL = 'http://127.0.0.1:3002' + + + + describe('E2E Tests for API Routes', () => { describe('GET /', () => { it('should return Hello, world! message', async () => { - const response = await request(TEST_URL).get('/'); + + const response = await request(TEST_URL).get('/'); + expect(response.statusCode).toBe(200); expect(response.text).toContain('SCRAPERS-JS: Hello, world! Fly.io'); }); @@ -16,6 +24,8 @@ describe('E2E Tests for API Routes', () => { describe('GET /test', () => { it('should return Hello, world! message', async () => { + + const response = await request(TEST_URL).get('/test'); expect(response.statusCode).toBe(200); expect(response.text).toContain('Hello, world!'); diff --git a/apps/api/src/services/logtail.ts b/apps/api/src/services/logtail.ts index 19ab773..8b86a6b 100644 --- a/apps/api/src/services/logtail.ts +++ b/apps/api/src/services/logtail.ts @@ -1,4 +1,19 @@ -const { Logtail } = require("@logtail/node"); -//dot env -require("dotenv").config(); -export const logtail = new Logtail(process.env.LOGTAIL_KEY); +import { Logtail } from "@logtail/node"; +import "dotenv/config"; + +// A mock Logtail class to handle cases where LOGTAIL_KEY is not provided +class MockLogtail { + info(message: string, context?: Record): void { + console.log(message, context); + } + error(message: string, context: Record = {}): void { + console.error(message, context); + } +} + +// Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class +// Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided +export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => { + console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more."); + return new MockLogtail(); +})(); From e6b46178ddbe9678036e2c11e51030007a2998ee Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sat, 20 Apr 2024 19:53:27 -0700 Subject: [PATCH 02/11] Caleb: added .env.example --- apps/api/.env.example | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 apps/api/.env.example diff --git a/apps/api/.env.example b/apps/api/.env.example new file mode 100644 index 0000000..392db9a --- /dev/null +++ b/apps/api/.env.example @@ -0,0 +1,18 @@ +# Required +NUM_WORKERS_PER_QUEUE=8 +PORT= +HOST= +SUPABASE_ANON_TOKEN= +SUPABASE_URL= +SUPABASE_SERVICE_TOKEN= +REDIS_URL= + +# Optional + +SCRAPING_BEE_API_KEY= +OPENAI_API_KEY= +BULL_AUTH_KEY= +LOGTAIL_KEY= +PLAYWRIGHT_MICROSERVICE_URL= +LLAMAPARSE_API_KEY= +TEST_API_KEY= \ No newline at end of file From be75aaa195ade4e41e8225cad7ba06e5df661385 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 09:31:22 -0700 Subject: [PATCH 03/11] Caleb: first version of supabase proxy to make db authentication optional --- apps/api/src/controllers/auth.ts | 11 +++++++ apps/api/src/controllers/crawl.ts | 15 +++++---- apps/api/src/controllers/scrape.ts | 26 ++++++++------- apps/api/src/services/supabase.ts | 53 +++++++++++++++++++++++++++--- 4 files changed, 83 insertions(+), 22 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 76bacbe..6ae234d 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -3,6 +3,7 @@ import { getRateLimiter } from "../../src/services/rate-limiter"; import { RateLimiterMode } from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; + export async function authenticateUser( req, res, @@ -13,6 +14,16 @@ export async function authenticateUser( error?: string; status?: number; }> { + + console.log(process.env) + + if(process.env.USE_DB_AUTHENTICATION === "false"){ + console.log("WARNING - YOU'RE bypassing Authentication"); + return { success: true}; + } + + console.log("USING SUPABASE AUTH"); + const authHeader = req.headers.authorization; if (!authHeader) { return { success: false, error: "Unauthorized", status: 401 }; diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 17cfa62..36c013e 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,6 +8,8 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { + + console.log("hello") const { success, team_id, error, status } = await authenticateUser( req, res, @@ -16,14 +18,15 @@ export async function crawlController(req: Request, res: Response) { if (!success) { return res.status(status).json({ error }); } - - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } } - // authenticate on supabase const url = req.body.url; if (!url) { return res.status(400).json({ error: "Url is required" }); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 632fff5..47b00f0 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -40,18 +40,22 @@ export async function scrapeHelper( if (filteredDocs.length === 0) { return { success: true, error: "No page found", returnCode: 200 }; } - const { success, credit_usage } = await billTeam( - team_id, - filteredDocs.length - ); - if (!success) { - return { - success: false, - error: - "Failed to bill team. Insufficient credits or subscription not found.", - returnCode: 402, - }; + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const { success, credit_usage } = await billTeam( + team_id, + filteredDocs.length + ); + if (!success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } } + return { success: true, data: filteredDocs[0], diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index 49121fa..9a2366d 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,6 +1,49 @@ -import { createClient } from "@supabase/supabase-js"; +import { createClient, SupabaseClient } from '@supabase/supabase-js'; -export const supabase_service = createClient( - process.env.SUPABASE_URL, - process.env.SUPABASE_SERVICE_TOKEN, -); +// SupabaseService class initializes the Supabase client conditionally based on environment variables. +class SupabaseService { + private client: SupabaseClient | null = null; + + constructor() { + const supabaseUrl = process.env.SUPABASE_URL; + const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; + + // Only initialize the Supabase client if both URL and Service Token are provided. + if (process.env.USE_DB_AUTHENTICATION === "false") { + + // Warn the user that Authentication is disabled by setting the client to null + console.warn("\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"); + this.client = null; + } else if (!supabaseUrl || !supabaseServiceToken) { + console.error("\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"); + } else { + this.client = createClient(supabaseUrl, supabaseServiceToken); + } + } + + // Provides access to the initialized Supabase client, if available. + getClient(): SupabaseClient | null { + return this.client; + } +} + +// Using a Proxy to handle dynamic access to the Supabase client or service methods. +// This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error. +export const supabase_service: SupabaseClient = new Proxy(new SupabaseService(), { + get: function (target, prop, receiver) { + const client = target.getClient(); + // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. + if (client === null) { + console.error("Attempted to access Supabase client when it's not configured."); + return () => { + throw new Error("Supabase client is not configured."); + }; + } + // Direct access to SupabaseService properties takes precedence. + if (prop in target) { + return Reflect.get(target, prop, receiver); + } + // Otherwise, delegate access to the Supabase client. + return Reflect.get(client, prop, receiver); + } +}) as unknown as SupabaseClient; \ No newline at end of file From 5cdbf3a0ac1838219813e064b1bf8d35fc2d538f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 10:36:48 -0700 Subject: [PATCH 04/11] Nick: cleaner functions to handle authenticated requests that dont require ifs everywhere --- apps/api/src/controllers/auth.ts | 18 +++---- apps/api/src/controllers/crawl.ts | 16 +++--- apps/api/src/controllers/scrape.ts | 2 - apps/api/src/lib/withAuth.ts | 19 +++++++ .../src/services/billing/credit_billing.ts | 10 +++- apps/api/src/services/supabase.ts | 51 +++++++++++-------- apps/api/src/types.ts | 10 ++-- 7 files changed, 76 insertions(+), 50 deletions(-) create mode 100644 apps/api/src/lib/withAuth.ts diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 6ae234d..49b2146 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -1,10 +1,15 @@ import { parseApi } from "../../src/lib/parseApi"; import { getRateLimiter } from "../../src/services/rate-limiter"; -import { RateLimiterMode } from "../../src/types"; +import { AuthResponse, RateLimiterMode } from "../../src/types"; import { supabase_service } from "../../src/services/supabase"; +import { withAuth } from "../../src/lib/withAuth"; -export async function authenticateUser( +export async function authenticateUser(req, res, mode?: RateLimiterMode) : Promise { + return withAuth(supaAuthenticateUser)(req, res, mode); +} + +export async function supaAuthenticateUser( req, res, mode?: RateLimiterMode @@ -15,15 +20,6 @@ export async function authenticateUser( status?: number; }> { - console.log(process.env) - - if(process.env.USE_DB_AUTHENTICATION === "false"){ - console.log("WARNING - YOU'RE bypassing Authentication"); - return { success: true}; - } - - console.log("USING SUPABASE AUTH"); - const authHeader = req.headers.authorization; if (!authHeader) { return { success: false, error: "Unauthorized", status: 401 }; diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 36c013e..1fb2698 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,8 +8,7 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { - - console.log("hello") + console.log("hello"); const { success, team_id, error, status } = await authenticateUser( req, res, @@ -18,13 +17,11 @@ export async function crawlController(req: Request, res: Response) { if (!success) { return res.status(status).json({ error }); } - - if (process.env.USE_DB_AUTHENTICATION === "true") { - const { success: creditsCheckSuccess, message: creditsCheckMessage } = - await checkTeamCredits(team_id, 1); - if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); - } + + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); } const url = req.body.url; @@ -45,7 +42,6 @@ export async function crawlController(req: Request, res: Response) { returnOnlyUrls: true, }, pageOptions: pageOptions, - }); const docs = await a.getDocuments(false, (progress) => { diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 47b00f0..be70800 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -41,7 +41,6 @@ export async function scrapeHelper( return { success: true, error: "No page found", returnCode: 200 }; } - if (process.env.USE_DB_AUTHENTICATION === "true") { const { success, credit_usage } = await billTeam( team_id, filteredDocs.length @@ -54,7 +53,6 @@ export async function scrapeHelper( returnCode: 402, }; } - } return { success: true, diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts new file mode 100644 index 0000000..3ed8906 --- /dev/null +++ b/apps/api/src/lib/withAuth.ts @@ -0,0 +1,19 @@ +import { AuthResponse } from "../../src/types"; + +export function withAuth( + originalFunction: (...args: U) => Promise +) { + return async function (...args: U): Promise { + if (process.env.USE_DB_AUTHENTICATION === "false") { + console.warn("WARNING - You're bypassing authentication"); + return { success: true } as T; + } else { + try { + return await originalFunction(...args); + } catch (error) { + console.error("Error in withAuth function: ", error); + return { success: false, error: error.message } as T; + } + } + }; +} diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 6ac0843..bf5be60 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -1,7 +1,12 @@ +import { withAuth } from "../../lib/withAuth"; import { supabase_service } from "../supabase"; const FREE_CREDITS = 100; + export async function billTeam(team_id: string, credits: number) { + return withAuth(supaBillTeam)(team_id, credits); +} +export async function supaBillTeam(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } @@ -52,8 +57,11 @@ export async function billTeam(team_id: string, credits: number) { return { success: true, credit_usage }; } -// if team has enough credits for the operation, return true, else return false export async function checkTeamCredits(team_id: string, credits: number) { + return withAuth(supaCheckTeamCredits)(team_id, credits); +} +// if team has enough credits for the operation, return true, else return false +export async function supaCheckTeamCredits(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index 9a2366d..fa6404d 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,4 +1,4 @@ -import { createClient, SupabaseClient } from '@supabase/supabase-js'; +import { createClient, SupabaseClient } from "@supabase/supabase-js"; // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { @@ -7,15 +7,17 @@ class SupabaseService { constructor() { const supabaseUrl = process.env.SUPABASE_URL; const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; - // Only initialize the Supabase client if both URL and Service Token are provided. if (process.env.USE_DB_AUTHENTICATION === "false") { - // Warn the user that Authentication is disabled by setting the client to null - console.warn("\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"); + console.warn( + "\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m" + ); this.client = null; } else if (!supabaseUrl || !supabaseServiceToken) { - console.error("\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"); + console.error( + "\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m" + ); } else { this.client = createClient(supabaseUrl, supabaseServiceToken); } @@ -29,21 +31,26 @@ class SupabaseService { // Using a Proxy to handle dynamic access to the Supabase client or service methods. // This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error. -export const supabase_service: SupabaseClient = new Proxy(new SupabaseService(), { - get: function (target, prop, receiver) { - const client = target.getClient(); - // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. - if (client === null) { - console.error("Attempted to access Supabase client when it's not configured."); - return () => { - throw new Error("Supabase client is not configured."); - }; - } - // Direct access to SupabaseService properties takes precedence. - if (prop in target) { - return Reflect.get(target, prop, receiver); - } - // Otherwise, delegate access to the Supabase client. - return Reflect.get(client, prop, receiver); +export const supabase_service: SupabaseClient = new Proxy( + new SupabaseService(), + { + get: function (target, prop, receiver) { + const client = target.getClient(); + // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. + if (client === null) { + console.error( + "Attempted to access Supabase client when it's not configured." + ); + return () => { + throw new Error("Supabase client is not configured."); + }; + } + // Direct access to SupabaseService properties takes precedence. + if (prop in target) { + return Reflect.get(target, prop, receiver); + } + // Otherwise, delegate access to the Supabase client. + return Reflect.get(client, prop, receiver); + }, } -}) as unknown as SupabaseClient; \ No newline at end of file +) as unknown as SupabaseClient; diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index f9e5c73..7f527fb 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -25,7 +25,6 @@ export interface WebScraperOptions { origin?: string; } - export interface FirecrawlJob { success: boolean; message: string; @@ -40,8 +39,6 @@ export interface FirecrawlJob { origin: string; } - - export enum RateLimiterMode { Crawl = "crawl", CrawlStatus = "crawl-status", @@ -49,4 +46,9 @@ export enum RateLimiterMode { Preview = "preview", } - +export interface AuthResponse { + success: boolean; + team_id?: string; + error?: string; + status?: number; +} From ef4ffd3a18e3b1c31a51d7fb3a53544f574a6c27 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 10:56:30 -0700 Subject: [PATCH 05/11] Adding contributors guide --- apps/api/.env.example | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 9a4541c..34e24b1 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -1,18 +1,24 @@ -ENV= -NUM_WORKERS_PER_QUEUE=8 -PORT= -HOST= -SUPABASE_ANON_TOKEN= -SUPABASE_URL= +# ===== Required ENVS ====== +NUM_WORKERS_PER_QUEUE=8 +PORT=3002 +HOST=0.0.0.0 +REDIS_URL=redis://localhost:6379 + +## To turn on DB authentication, you need to set up supabase. +USE_DB_AUTHENTICATION=true + +# ===== Optional ENVS ====== + +# Supabase Setup (used to support DB authentication, advanced logging, etc.) +SUPABASE_ANON_TOKEN= +SUPABASE_URL= SUPABASE_SERVICE_TOKEN= -REDIS_URL= -# Optional - -SCRAPING_BEE_API_KEY= -OPENAI_API_KEY= -BULL_AUTH_KEY= -LOGTAIL_KEY= -PLAYWRIGHT_MICROSERVICE_URL= -LLAMAPARSE_API_KEY= -TEST_API_KEY= \ No newline at end of file +# Other Optionals +TEST_API_KEY= # use if you've set up authentication and want to test with a real API key +SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking +OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) +BULL_AUTH_KEY= # +LOGTAIL_KEY= # Use if you're configuring basic logging with logtail +PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs \ No newline at end of file From 401f992c562fd94cb6034bab882c7a70839d4468 Mon Sep 17 00:00:00 2001 From: Caleb Peffer <44934913+calebpeffer@users.noreply.github.com> Date: Sun, 21 Apr 2024 11:19:40 -0700 Subject: [PATCH 06/11] Caleb: added contributors guide --- CONTRIBUTING.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 224eb57..5d4b69e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,96 @@ -# Contributing -We love contributions! Our contribution guide will be coming soon! +# Contributors guide: - +Welcome to firecrawl 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) + +If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue! + + +## Hosting locally + +First, start by installing dependencies +1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) +2. pnpm [instructions](https://pnpm.io/installation) +3. redis - [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) + + +Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example. + +To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features ) + +```.env +# ===== Required ENVS ====== +NUM_WORKERS_PER_QUEUE=8 +PORT=3002 +HOST=0.0.0.0 +REDIS_URL=redis://localhost:6379 + +## To turn on DB authentication, you need to set up supabase. +USE_DB_AUTHENTICATION=false + +# ===== Optional ENVS ====== + +# Supabase Setup (used to support DB authentication, advanced logging, etc.) +SUPABASE_ANON_TOKEN= +SUPABASE_URL= +SUPABASE_SERVICE_TOKEN= + +# Other Optionals +TEST_API_KEY= # use if you've set up authentication and want to test with a real API key +SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking +OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.) +BULL_AUTH_KEY= # +LOGTAIL_KEY= # Use if you're configuring basic logging with logtail +PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback +LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs + +``` + +You're going to need to open 3 terminals. + +### Terminal 1 - setting up redis + +Run the command anywhere within your project + +`redis-server` + + +### Terminal 2 - setting up workers + +Now, navigate to the apps/api/ directory and run: +`pnpm run workers` + +### Terminal 3 - setting up the main server + + +To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation +Next, run your server with`pnpm run start` + + + +### Terminal 3 - sending our first request. + +Alright: now let’s send our first request. + +```curl +curl -X GET http://localhost:3002/test +``` +This should return the response Hello, world! + + +If you’d like to test the crawl endpoint, you can run this + +```curl +curl -X POST http://localhost:3002/v0/crawl \ + -H 'Content-Type: application/json' \ + -d '{ + "url": "https://mendable.ai" + }' +``` + +## Tests: + +The best way to do this is run the test with npx:Once again, navigate to the `apps/api` directory`npx jest` From 898d729a8455785082e2015e695604d1c3c3ff0c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:27:31 -0700 Subject: [PATCH 07/11] Nick: tests --- apps/api/src/__tests__/e2e/index.test.ts | 346 +++++++++--------- .../src/__tests__/e2e_noAuth/index.test.ts | 156 ++++++++ apps/api/src/controllers/crawl.ts | 1 - apps/api/src/index.ts | 2 +- 4 files changed, 334 insertions(+), 171 deletions(-) create mode 100644 apps/api/src/__tests__/e2e_noAuth/index.test.ts diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e/index.test.ts index ebf87c6..ba01a7c 100644 --- a/apps/api/src/__tests__/e2e/index.test.ts +++ b/apps/api/src/__tests__/e2e/index.test.ts @@ -1,189 +1,197 @@ -import request from 'supertest'; -import { app } from '../../index'; -import dotenv from 'dotenv'; +import request from "supertest"; +import { app } from "../../index"; +import dotenv from "dotenv"; dotenv.config(); // const TEST_URL = 'http://localhost:3002' -const TEST_URL = 'http://127.0.0.1:3002' +const TEST_URL = "http://127.0.0.1:3002"; - - - -describe('E2E Tests for API Routes', () => { - describe('GET /', () => { - it('should return Hello, world! message', async () => { - - const response = await request(TEST_URL).get('/'); - - expect(response.statusCode).toBe(200); - expect(response.text).toContain('SCRAPERS-JS: Hello, world! Fly.io'); - }); - }); - - describe('GET /test', () => { - it('should return Hello, world! message', async () => { - - - const response = await request(TEST_URL).get('/test'); - expect(response.statusCode).toBe(200); - expect(response.text).toContain('Hello, world!'); - }); - }); - - describe('POST /v0/scrape', () => { - it('should require authorization', async () => { - const response = await request(app).post('/v0/scrape'); - expect(response.statusCode).toBe(401); + describe("E2E Tests for API Routes", () => { + beforeAll(() => { + process.env.USE_DB_AUTHENTICATION = "true"; }); - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); + afterAll(() => { + delete process.env.USE_DB_AUTHENTICATION; }); - it('should return a successful response with a valid preview token', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer this_is_just_a_preview_token`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - }, 10000); // 10 seconds timeout + describe("GET /", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/"); - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/scrape') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('data'); - expect(response.body.data).toHaveProperty('content'); - expect(response.body.data).toHaveProperty('markdown'); - expect(response.body.data).toHaveProperty('metadata'); - expect(response.body.data.content).toContain('🔥 FireCrawl'); - }, 30000); // 30 seconds timeout - }); - - describe('POST /v0/crawl', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).post('/v0/crawl'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); - }); - - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('jobId'); - expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/); - }); - - // Additional tests for insufficient credits? - }); - - describe('POST /v0/crawlWebsitePreview', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).post('/v0/crawlWebsitePreview'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawlWebsitePreview') - .set('Authorization', `Bearer invalid-api-key`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(401); - }); - - it('should return a successful response with a valid API key', async () => { - const response = await request(TEST_URL) - .post('/v0/crawlWebsitePreview') - .set('Authorization', `Bearer this_is_just_a_preview_token`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('jobId'); - expect(response.body.jobId).toMatch(/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/); - }); - }); - - describe('GET /v0/crawl/status/:jobId', () => { - it('should require authorization', async () => { - const response = await request(TEST_URL).get('/v0/crawl/status/123'); - expect(response.statusCode).toBe(401); - }); - - it('should return an error response with an invalid API key', async () => { - const response = await request(TEST_URL) - .get('/v0/crawl/status/123') - .set('Authorization', `Bearer invalid-api-key`); - expect(response.statusCode).toBe(401); - }); - - it('should return Job not found for invalid job ID', async () => { - const response = await request(TEST_URL) - .get('/v0/crawl/status/invalidJobId') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); - expect(response.statusCode).toBe(404); - }); - - it('should return a successful response for a valid crawl job', async () => { - const crawlResponse = await request(TEST_URL) - .post('/v0/crawl') - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`) - .set('Content-Type', 'application/json') - .send({ url: 'https://firecrawl.dev' }); - expect(crawlResponse.statusCode).toBe(200); - - - const response = await request(TEST_URL) - .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('status'); - expect(response.body.status).toBe('active'); + expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io"); + }); + }); + + describe("GET /test", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/test"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("Hello, world!"); + }); + }); + + describe("POST /v0/scrape", () => { + it("should require authorization", async () => { + const response = await request(app).post("/v0/scrape"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + it("should return a successful response with a valid preview token", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer this_is_just_a_preview_token`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + }, 10000); // 10 seconds timeout + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("content"); + expect(response.body.data).toHaveProperty("markdown"); + expect(response.body.data).toHaveProperty("metadata"); + expect(response.body.data.content).toContain("🔥 FireCrawl"); + }, 30000); // 30 seconds timeout + }); + + describe("POST /v0/crawl", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawl"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + + // Additional tests for insufficient credits? + }); + + describe("POST /v0/crawlWebsitePreview", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).post( + "/v0/crawlWebsitePreview" + ); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Authorization", `Bearer invalid-api-key`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(401); + }); + + it("should return a successful response with a valid API key", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Authorization", `Bearer this_is_just_a_preview_token`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("GET /v0/crawl/status/:jobId", () => { + it("should require authorization", async () => { + const response = await request(TEST_URL).get("/v0/crawl/status/123"); + expect(response.statusCode).toBe(401); + }); + + it("should return an error response with an invalid API key", async () => { + const response = await request(TEST_URL) + .get("/v0/crawl/status/123") + .set("Authorization", `Bearer invalid-api-key`); + expect(response.statusCode).toBe(401); + }); + + it("should return Job not found for invalid job ID", async () => { + const response = await request(TEST_URL) + .get("/v0/crawl/status/invalidJobId") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(response.statusCode).toBe(404); + }); + + it("should return a successful response for a valid crawl job", async () => { + const crawlResponse = await request(TEST_URL) + .post("/v0/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(crawlResponse.statusCode).toBe(200); + + const response = await request(TEST_URL) + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("status"); + expect(response.body.status).toBe("active"); // wait for 30 seconds await new Promise((r) => setTimeout(r, 30000)); const completedResponse = await request(TEST_URL) - .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) - .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`); + .get(`/v0/crawl/status/${crawlResponse.body.jobId}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); expect(completedResponse.statusCode).toBe(200); - expect(completedResponse.body).toHaveProperty('status'); - expect(completedResponse.body.status).toBe('completed'); - expect(completedResponse.body).toHaveProperty('data'); - expect(completedResponse.body.data[0]).toHaveProperty('content'); - expect(completedResponse.body.data[0]).toHaveProperty('markdown'); - expect(completedResponse.body.data[0]).toHaveProperty('metadata'); - expect(completedResponse.body.data[0].content).toContain('🔥 FireCrawl'); - }, 60000); // 60 seconds - }); + expect(completedResponse.body).toHaveProperty("status"); + expect(completedResponse.body.status).toBe("completed"); + expect(completedResponse.body).toHaveProperty("data"); + expect(completedResponse.body.data[0]).toHaveProperty("content"); + expect(completedResponse.body.data[0]).toHaveProperty("markdown"); + expect(completedResponse.body.data[0]).toHaveProperty("metadata"); + expect(completedResponse.body.data[0].content).toContain( + "🔥 FireCrawl" + ); + }, 60000); // 60 seconds + }); - describe('GET /is-production', () => { - it('should return the production status', async () => { - const response = await request(TEST_URL).get('/is-production'); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty('isProduction'); + describe("GET /is-production", () => { + it("should return the production status", async () => { + const response = await request(TEST_URL).get("/is-production"); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("isProduction"); + }); }); }); -}); \ No newline at end of file diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts new file mode 100644 index 0000000..e0aca36 --- /dev/null +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -0,0 +1,156 @@ +import request from "supertest"; +import { app } from "../../index"; +import dotenv from "dotenv"; +const fs = require("fs"); +const path = require("path"); + +dotenv.config(); + +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for API Routes with No Authentication", () => { + let originalEnv: NodeJS.ProcessEnv; + + // save original process.env + beforeAll(() => { + originalEnv = { ...process.env }; + process.env.USE_DB_AUTHENTICATION = "false"; + process.env.SUPABASE_ANON_TOKEN = ""; + process.env.SUPABASE_URL = ""; + process.env.SUPABASE_SERVICE_TOKEN = ""; + process.env.SCRAPING_BEE_API_KEY = ""; + process.env.OPENAI_API_KEY = ""; + process.env.BULL_AUTH_KEY = ""; + process.env.LOGTAIL_KEY = ""; + process.env.PLAYWRIGHT_MICROSERVICE_URL = ""; + process.env.LLAMAPARSE_API_KEY = ""; + process.env.TEST_API_KEY = ""; + }); + + // restore original process.env + afterAll(() => { + process.env = originalEnv; + }); + + + describe("GET /", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("SCRAPERS-JS: Hello, world! Fly.io"); + }); + }); + + describe("GET /test", () => { + it("should return Hello, world! message", async () => { + const response = await request(TEST_URL).get("/test"); + expect(response.statusCode).toBe(200); + expect(response.text).toContain("Hello, world!"); + }); + }); + + describe("POST /v0/scrape", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/scrape"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/scrape") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + }, 10000); // 10 seconds timeout + }); + + describe("POST /v0/crawl", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawl"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/crawl") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("POST /v0/crawlWebsitePreview", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).post("/v0/crawlWebsitePreview"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return a successful response", async () => { + const response = await request(TEST_URL) + .post("/v0/crawlWebsitePreview") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("jobId"); + expect(response.body.jobId).toMatch( + /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ + ); + }); + }); + + describe("GET /v0/crawl/status/:jobId", () => { + it("should not require authorization", async () => { + const response = await request(TEST_URL).get("/v0/crawl/status/123"); + expect(response.statusCode).not.toBe(401); + }); + + it("should return Job not found for invalid job ID", async () => { + const response = await request(TEST_URL).get( + "/v0/crawl/status/invalidJobId" + ); + expect(response.statusCode).toBe(404); + }); + + it("should return a successful response for a valid crawl job", async () => { + const crawlResponse = await request(TEST_URL) + .post("/v0/crawl") + .set("Content-Type", "application/json") + .send({ url: "https://firecrawl.dev" }); + expect(crawlResponse.statusCode).toBe(200); + + const response = await request(TEST_URL).get( + `/v0/crawl/status/${crawlResponse.body.jobId}` + ); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("status"); + expect(response.body.status).toBe("active"); + + // wait for 30 seconds + await new Promise((r) => setTimeout(r, 30000)); + + const completedResponse = await request(TEST_URL).get( + `/v0/crawl/status/${crawlResponse.body.jobId}` + ); + expect(completedResponse.statusCode).toBe(200); + expect(completedResponse.body).toHaveProperty("status"); + expect(completedResponse.body.status).toBe("completed"); + expect(completedResponse.body).toHaveProperty("data"); + expect(completedResponse.body.data[0]).toHaveProperty("content"); + expect(completedResponse.body.data[0]).toHaveProperty("markdown"); + expect(completedResponse.body.data[0]).toHaveProperty("metadata"); + expect(completedResponse.body.data[0].content).toContain("🔥 FireCrawl"); + }, 60000); // 60 seconds + }); + + describe("GET /is-production", () => { + it("should return the production status", async () => { + const response = await request(TEST_URL).get("/is-production"); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("isProduction"); + }); + }); +}); diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 1fb2698..bd3feca 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -8,7 +8,6 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; export async function crawlController(req: Request, res: Response) { try { - console.log("hello"); const { success, team_id, error, status } = await authenticateUser( req, res, diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 1a42eb4..a2e5c51 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -5,7 +5,6 @@ import "dotenv/config"; import { getWebScraperQueue } from "./services/queue-service"; import { redisClient } from "./services/rate-limiter"; import { v0Router } from "./routes/v0"; - const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { ExpressAdapter } = require("@bull-board/express"); @@ -48,6 +47,7 @@ const DEFAULT_PORT = process.env.PORT ?? 3002; const HOST = process.env.HOST ?? "localhost"; redisClient.connect(); + export function startServer(port = DEFAULT_PORT) { const server = app.listen(Number(port), HOST, () => { console.log(`Server listening on port ${port}`); From 52620bab16e087bfa4c9d1f11ca91af8f1f79632 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:39:36 -0700 Subject: [PATCH 08/11] Nick: prod and local-no-auth tests --- .github/workflows/ci.yml | 2 +- apps/api/package.json | 2 ++ apps/api/src/__tests__/{e2e => e2e_withAuth}/index.test.ts | 0 apps/api/src/lib/withAuth.ts | 7 ++++++- 4 files changed, 9 insertions(+), 2 deletions(-) rename apps/api/src/__tests__/{e2e => e2e_withAuth}/index.test.ts (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9a5b79..69a8a24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,5 +54,5 @@ jobs: id: start_workers - name: Run E2E tests run: | - npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false + npm run test:prod working-directory: ./apps/api \ No newline at end of file diff --git a/apps/api/package.json b/apps/api/package.json index cbce4be..0b533f9 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -11,6 +11,8 @@ "start:dev": "nodemon --exec ts-node src/index.ts", "build": "tsc", "test": "jest --verbose", + "test:local-no-auth":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", + "test:prod":"npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "workers": "nodemon --exec ts-node src/services/queue-worker.ts", "worker:production": "node dist/src/services/queue-worker.js", "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", diff --git a/apps/api/src/__tests__/e2e/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts similarity index 100% rename from apps/api/src/__tests__/e2e/index.test.ts rename to apps/api/src/__tests__/e2e_withAuth/index.test.ts diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts index 3ed8906..ea5aa4d 100644 --- a/apps/api/src/lib/withAuth.ts +++ b/apps/api/src/lib/withAuth.ts @@ -1,11 +1,16 @@ import { AuthResponse } from "../../src/types"; +let warningCount = 0; + export function withAuth( originalFunction: (...args: U) => Promise ) { return async function (...args: U): Promise { if (process.env.USE_DB_AUTHENTICATION === "false") { - console.warn("WARNING - You're bypassing authentication"); + if (warningCount < 5) { + console.warn("WARNING - You're bypassing authentication"); + warningCount++; + } return { success: true } as T; } else { try { From 30a8482a68e42084a36a892921854fa49144b524 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:41:34 -0700 Subject: [PATCH 09/11] Nick: --- README.md | 2 +- SELF_HOST.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 56f8c5c..f6b67b7 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ We provide an easy to use API with our hosted version. You can find the playgrou - [ ] LangchainJS - Coming Soon -Self-host. To self-host refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/SELF_HOST.md). +To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md). ### API Key diff --git a/SELF_HOST.md b/SELF_HOST.md index ba0ae23..8d1d490 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -1,6 +1,6 @@ # Self-hosting Firecrawl -Guide coming soon. +Refer to [CONTRIBUTING.md](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) for instructions on how to run it locally. *This repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository. The primary objective is to enhance the accuracy of LLM responses by utilizing clean data. It is not ready for full self-host yet - we're working on it* From 2f29a4da8eb3b10b9c9782e17e46d662ec3010c9 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:45:15 -0700 Subject: [PATCH 10/11] Update CONTRIBUTING.md --- CONTRIBUTING.md | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d4b69e..abd3027 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,12 @@ # Contributors guide: -Welcome to firecrawl 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) +Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) If you're contributing, note that the process is similar to other open source repos i.e. (fork firecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to hello@mendable.ai for more or submit an issue! -## Hosting locally +## Running the project locally First, start by installing dependencies 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) @@ -46,6 +46,16 @@ LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse ``` +### Installing dependencies + +First, install the dependencies using pnpm. + +```bash +pnpm install +``` + +### Running the project + You're going to need to open 3 terminals. ### Terminal 1 - setting up redis @@ -64,7 +74,7 @@ Now, navigate to the apps/api/ directory and run: To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation -Next, run your server with`pnpm run start` +Next, run your server with `pnpm run start` @@ -90,7 +100,8 @@ curl -X POST http://localhost:3002/v0/crawl \ ## Tests: -The best way to do this is run the test with npx:Once again, navigate to the `apps/api` directory`npx jest` - +The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication. + +If you'd like to run the tests with authentication, run `npm run test:prod` From 84be3d2bcaa6af7263b8aaff5b71bdb805eb28e0 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 21 Apr 2024 11:51:39 -0700 Subject: [PATCH 11/11] Update CONTRIBUTING.md --- CONTRIBUTING.md | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index abd3027..733c787 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,3 @@ - # Contributors guide: Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) @@ -11,14 +10,15 @@ If you're contributing, note that the process is similar to other open source re First, start by installing dependencies 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) 2. pnpm [instructions](https://pnpm.io/installation) -3. redis - [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) +3. redis [instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-redis/) Set environment variables in a .env in the /apps/api/ directoryyou can copy over the template in .env.example. To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features ) -```.env +.env: +``` # ===== Required ENVS ====== NUM_WORKERS_PER_QUEUE=8 PORT=3002 @@ -62,21 +62,28 @@ You're going to need to open 3 terminals. Run the command anywhere within your project -`redis-server` +```bash +redis-server +``` - ### Terminal 2 - setting up workers Now, navigate to the apps/api/ directory and run: -`pnpm run workers` - +```bash +pnpm run workers +``` + +This will start the workers who are responsible for processing crawl jobs. + ### Terminal 3 - setting up the main server To do this, navigate to the apps/api/ directory and run if you don’t have this already, install pnpm here: https://pnpm.io/installation -Next, run your server with `pnpm run start` - +Next, run your server with: +```bash +pnpm run start +``` ### Terminal 3 - sending our first request.