From b7e3104c7b72ccd857b65e3017028506809a88b8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 8 May 2024 12:18:53 -0700 Subject: [PATCH] Ni --- apps/api/src/controllers/auth.ts | 2 +- apps/api/src/services/rate-limiter.ts | 6 +- apps/test-suite/.env.example | 1 + apps/test-suite/data/websites.json | 168 +++++++++++++++++--------- apps/test-suite/index.test.ts | 20 +-- apps/test-suite/utils/log.ts | 4 +- 6 files changed, 133 insertions(+), 68 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 2aa2297..77aa52f 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -38,7 +38,7 @@ export async function supaAuthenticateUser( req.socket.remoteAddress) as string; const iptoken = incomingIP + token; await getRateLimiter( - token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode + token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode, token ).consume(iptoken); } catch (rateLimiterRes) { console.error(rateLimiterRes); diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index b1ee562..e539075 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -69,7 +69,11 @@ export function crawlRateLimit(plan: string){ -export function getRateLimiter(mode: RateLimiterMode){ +export function getRateLimiter(mode: RateLimiterMode, token: string){ + // Special test suite case. TODO: Change this later. + if(token.includes("5089cefa58")){ + return crawlStatusRateLimiter; + } switch(mode) { case RateLimiterMode.Preview: return previewRateLimiter; diff --git a/apps/test-suite/.env.example b/apps/test-suite/.env.example index 24e60b3..67f0368 100644 --- a/apps/test-suite/.env.example +++ b/apps/test-suite/.env.example @@ -2,3 +2,4 @@ OPENAI_API_KEY= TEST_API_KEY= TEST_URL=http://localhost:3002 ANTHROPIC_API_KEY= +ENV= \ No newline at end of file diff --git a/apps/test-suite/data/websites.json b/apps/test-suite/data/websites.json index 270872e..0499514 100644 --- a/apps/test-suite/data/websites.json +++ b/apps/test-suite/data/websites.json @@ -1,57 +1,113 @@ [ - { - "website":"https://www.anthropic.com/claude", - "prompt":"Does this website contain pricing information?", - "expected_output":"yes" - }, - { - "website":"https://mendable.ai/pricing", - "prompt":"Does this website contain pricing information?", - "expected_output":"yes" - }, - { - "website":"https://openai.com/news", - "prompt":"Does this website contain a list of research news?", - "expected_output":"yes" - }, - { - "website":"https://agentops.ai", - "prompt":"Does this website contain a code snippets?", - "expected_output":"yes" - }, - { - "website":"https://ycombinator.com/companies", - "prompt":"Does this website contain a list bigger than 5 of ycombinator companies?", - "expected_output":"yes" - }, - { - "website":"https://firecrawl.dev", - "prompt":"Does this website contain a list bigger than 5 of ycombinator companies?", - "expected_output":"yes" - }, - { - "website":"https://en.wikipedia.org/wiki/T._N._Seshan", - "prompt":"Does this website talk about Seshan's career?", - "expected_output":"yes" - }, - { - "website":"https://mendable.ai/blog", - "prompt":"Does this website contain multiple blog articles?", - "expected_output":"yes" - }, - { - "website":"https://mendable.ai/blog", - "prompt":"Does this website contain multiple blog articles?", - "expected_output":"yes" - }, - { - "website":"https://news.ycombinator.com/", - "prompt":"Does this website contain a list of articles in a table markdown format?", - "expected_output":"yes" - }, - { - "website":"https://www.vellum.ai/llm-leaderboard", - "prompt":"Does this website contain a model comparison table?", - "expected_output":"yes" - } -] \ No newline at end of file + { + "website": "https://www.anthropic.com/claude", + "prompt": "Does this website contain pricing information?", + "expected_output": "yes" + }, + { + "website": "https://mendable.ai/pricing", + "prompt": "Does this website contain pricing information?", + "expected_output": "yes" + }, + { + "website": "https://openai.com/news", + "prompt": "Does this website contain a list of research news?", + "expected_output": "yes" + }, + { + "website": "https://agentops.ai", + "prompt": "Does this website contain a code snippets?", + "expected_output": "yes" + }, + { + "website": "https://ycombinator.com/companies", + "prompt": "Does this website contain a list bigger than 5 of ycombinator companies?", + "expected_output": "yes" + }, + { + "website": "https://firecrawl.dev", + "prompt": "Does this website contain a list bigger than 5 of ycombinator companies?", + "expected_output": "no" + }, + { + "website": "https://en.wikipedia.org/wiki/T._N._Seshan", + "prompt": "Does this website talk about Seshan's career?", + "expected_output": "yes" + }, + { + "website": "https://mendable.ai/blog", + "prompt": "Does this website contain multiple blog articles?", + "expected_output": "yes" + }, + { + "website": "https://mendable.ai/blog", + "prompt": "Does this website contain multiple blog articles?", + "expected_output": "yes" + }, + { + "website": "https://news.ycombinator.com/", + "prompt": "Does this website contain a list of articles in a table markdown format?", + "expected_output": "yes" + }, + { + "website": "https://www.vellum.ai/llm-leaderboard", + "prompt": "Does this website contain a model comparison table?", + "expected_output": "yes" + }, + { + "website": "https://www.bigbadtoystore.com", + "prompt": "are there more than 3 toys in the new arrivals section?", + "expected_output": "yes" + }, + { + "website": "https://www.instructables.com", + "prompt": "Does the site offer more than 5 links about circuits?", + "expected_output": "yes" + }, + { + "website": "https://www.powells.com", + "prompt": "is there at least 10 books webpage links?", + "expected_output": "yes" + }, + { + "website": "https://www.royalacademy.org.uk", + "prompt": "is there information on upcoming art exhibitions?", + "expected_output": "yes" + }, + { + "website": "https://www.eastbaytimes.com", + "prompt": "Is there a Trending Nationally section that lists articles?", + "expected_output": "yes" + }, + { + "website": "https://www.manchestereveningnews.co.uk", + "prompt": "is the content focused on Manchester sports news?", + "expected_output": "no" + }, + { + "website": "https://physicsworld.com", + "prompt": "does the site provide at least 15 updates on the latest physics research?", + "expected_output": "yes" + }, + { + "website": "https://richmondconfidential.org", + "prompt": "does the page contains articles about community college updates?", + "expected_output": "yes" + }, + { + "website": "https://www.techinasia.com", + "prompt": "is there at least 10 articles of the startup scene in Asia?", + "expected_output": "yes", + "notes": "The website has a paywall and bot detectors." + }, + { + "website": "https://www.boardgamegeek.com", + "prompt": "are there more than 5 board game news?", + "expected_output": "yes" + }, + { + "website": "https://www.mountainproject.com", + "prompt": "Are there more than 3 climbing guides for Arizona?", + "expected_output": "yes" + } +] diff --git a/apps/test-suite/index.test.ts b/apps/test-suite/index.test.ts index c00e00a..ef4cce4 100644 --- a/apps/test-suite/index.test.ts +++ b/apps/test-suite/index.test.ts @@ -31,10 +31,10 @@ describe("Scraping/Crawling Checkup (E2E)", () => { } }); - describe("Scraping website dataset", () => { - it("Should scrape the website and prompt it against Claude", async () => { + describe("Scraping website tests with a dataset", () => { + it("Should scrape the website and prompt it against OpenAI", async () => { let passedTests = 0; - const batchSize = 5; + const batchSize = 15; // Adjusted to comply with the rate limit of 15 per minute const batchPromises = []; let totalTokens = 0; @@ -45,8 +45,10 @@ describe("Scraping/Crawling Checkup (E2E)", () => { let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`; const errorLog: WebsiteScrapeError[] = []; - for (let i = 0; i < websitesData.length; i += batchSize) { + // Introducing delay to respect the rate limit of 15 requests per minute + await new Promise(resolve => setTimeout(resolve, 10000)); + const batch = websitesData.slice(i, i + batchSize); const batchPromise = Promise.all( batch.map(async (websiteData: WebsiteData) => { @@ -144,15 +146,17 @@ describe("Scraping/Crawling Checkup (E2E)", () => { console.log(`Score: ${score}%`); console.log(`Total tokens: ${totalTokens}`); - if (errorLog.length > 0) { + await logErrors(errorLog, timeTaken, totalTokens, score, validResponses.length); + + if (process.env.ENV === "local" && errorLog.length > 0) { if (!fs.existsSync(logsDir)){ fs.mkdirSync(logsDir, { recursive: true }); } fs.writeFileSync(errorLogFileName, JSON.stringify(errorLog, null, 2)); - logErrors(errorLog, timeTaken, totalTokens, score); } + - expect(score).toBeGreaterThanOrEqual(90); - }, 150000); // 150 seconds timeout + expect(score).toBeGreaterThanOrEqual(80); + }, 350000); // 150 seconds timeout }); }); diff --git a/apps/test-suite/utils/log.ts b/apps/test-suite/utils/log.ts index 809579a..b029bf7 100644 --- a/apps/test-suite/utils/log.ts +++ b/apps/test-suite/utils/log.ts @@ -1,9 +1,9 @@ import { supabase_service } from "./supabase"; import { WebsiteScrapeError } from "./types"; -export async function logErrors(dataError: WebsiteScrapeError[], time_taken: number, num_tokens:number, score: number) { +export async function logErrors(dataError: WebsiteScrapeError[], time_taken: number, num_tokens:number, score: number, num_pages_tested: number,) { try { - await supabase_service.from("test_suite_logs").insert([{log:dataError, time_taken, num_tokens, score}]); + await supabase_service.from("test_suite_logs").insert([{log:dataError, time_taken, num_tokens, score, num_pages_tested, is_error: dataError.length > 0}]); } catch (error) { console.error(`Error logging to supabase: ${error}`); }