0

Merge pull request #118 from mendableai/feat/test-suite

[Test] Added integration tests suite
This commit is contained in:
Nicolas 2024-05-08 12:47:17 -07:00 committed by GitHub
commit 4a5f87623c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 3420 additions and 3 deletions

View File

@ -61,7 +61,7 @@ jobs:
deploy: deploy:
name: Deploy app name: Deploy app
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: pre-deploy needs: pre-deploy-test-suite
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Change directory - name: Change directory

62
.github/workflows/test_suite.yml vendored Normal file
View File

@ -0,0 +1,62 @@
name: Test Suite
on:
push:
branches:
- main
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
PORT: ${{ secrets.PORT }}
REDIS_URL: ${{ secrets.REDIS_URL }}
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
jobs:
pre-deploy:
name: Pre-deploy checks
runs-on: ubuntu-latest
services:
redis:
image: redis
ports:
- 6379:6379
steps:
- uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
node-version: "20"
- name: Install pnpm
run: npm install -g pnpm
- name: Install dependencies
run: pnpm install
working-directory: ./apps/api
- name: Start the application
run: npm start &
working-directory: ./apps/api
id: start_app
- name: Start workers
run: npm run workers &
working-directory: ./apps/api
id: start_workers
- name: Install dependencies
run: pnpm install
working-directory: ./apps/test-suite
- name: Run E2E tests
run: |
npm run test
working-directory: ./apps/test-suite

6
.gitignore vendored
View File

@ -8,3 +8,9 @@ dump.rdb
apps/js-sdk/node_modules/ apps/js-sdk/node_modules/
apps/api/.env.local apps/api/.env.local
apps/test-suite/node_modules/
apps/test-suite/.env
apps/test-suite/logs

View File

@ -38,7 +38,7 @@ export async function supaAuthenticateUser(
req.socket.remoteAddress) as string; req.socket.remoteAddress) as string;
const iptoken = incomingIP + token; const iptoken = incomingIP + token;
await getRateLimiter( await getRateLimiter(
token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode token === "this_is_just_a_preview_token" ? RateLimiterMode.Preview : mode, token
).consume(iptoken); ).consume(iptoken);
} catch (rateLimiterRes) { } catch (rateLimiterRes) {
console.error(rateLimiterRes); console.error(rateLimiterRes);

View File

@ -69,7 +69,11 @@ export function crawlRateLimit(plan: string){
export function getRateLimiter(mode: RateLimiterMode){ export function getRateLimiter(mode: RateLimiterMode, token: string){
// Special test suite case. TODO: Change this later.
if(token.includes("5089cefa58")){
return crawlStatusRateLimiter;
}
switch(mode) { switch(mode) {
case RateLimiterMode.Preview: case RateLimiterMode.Preview:
return previewRateLimiter; return previewRateLimiter;

View File

@ -0,0 +1,5 @@
OPENAI_API_KEY=
TEST_API_KEY=
TEST_URL=http://localhost:3002
ANTHROPIC_API_KEY=
ENV=

43
apps/test-suite/README.md Normal file
View File

@ -0,0 +1,43 @@
# Test Suite for Firecrawl
This document provides an overview of the test suite for the Firecrawl project. It includes instructions on how to run the tests and interpret the results.
## Overview
The test suite is designed to ensure the reliability and performance of the Firecrawl system. It includes a series of automated tests that check various functionalities and performance metrics.
## Running the Tests
To run the tests, navigate to the `test-suite` directory and execute the following command:
```bash
npm install
npx playwright install
npm run test
```
## Test Results
The tests are designed to cover various aspects of the system, including:
- Crawling accuracy
- Response time
- Error handling
### Example Test Case
- **Test Name**: Accuracy Test
- **Description**: This test checks the accuracy of the scraping mechanism with 100 pages and a fuzzy threshold of 0.8.
- **Expected Result**: Accuracy >= 0.9
- **Received Result**: Accuracy between 0.2 and 0.3
## Troubleshooting
If you encounter any failures or unexpected results, please check the following:
- Ensure your network connection is stable.
- Verify that all dependencies are correctly installed.
- Review the error logs for any specific error messages.
## Contributing
Contributions to the test suite are welcome. Please refer to the project's main [CONTRIBUTING.md](../CONTRIBUTING.md) file for guidelines on how to contribute.

View File

@ -0,0 +1,113 @@
[
{
"website": "https://www.anthropic.com/claude",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/pricing",
"prompt": "Does this website contain pricing information?",
"expected_output": "yes"
},
{
"website": "https://openai.com/news",
"prompt": "Does this website contain a list of research news?",
"expected_output": "yes"
},
{
"website": "https://agentops.ai",
"prompt": "Does this website contain a code snippets?",
"expected_output": "yes"
},
{
"website": "https://ycombinator.com/companies",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "yes"
},
{
"website": "https://firecrawl.dev",
"prompt": "Does this website contain a list bigger than 5 of ycombinator companies?",
"expected_output": "no"
},
{
"website": "https://en.wikipedia.org/wiki/T._N._Seshan",
"prompt": "Does this website talk about Seshan's career?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/blog",
"prompt": "Does this website contain multiple blog articles?",
"expected_output": "yes"
},
{
"website": "https://mendable.ai/blog",
"prompt": "Does this website contain multiple blog articles?",
"expected_output": "yes"
},
{
"website": "https://news.ycombinator.com/",
"prompt": "Does this website contain a list of articles in a table markdown format?",
"expected_output": "yes"
},
{
"website": "https://www.vellum.ai/llm-leaderboard",
"prompt": "Does this website contain a model comparison table?",
"expected_output": "yes"
},
{
"website": "https://www.bigbadtoystore.com",
"prompt": "are there more than 3 toys in the new arrivals section?",
"expected_output": "yes"
},
{
"website": "https://www.instructables.com",
"prompt": "Does the site offer more than 5 links about circuits?",
"expected_output": "yes"
},
{
"website": "https://www.powells.com",
"prompt": "is there at least 10 books webpage links?",
"expected_output": "yes"
},
{
"website": "https://www.royalacademy.org.uk",
"prompt": "is there information on upcoming art exhibitions?",
"expected_output": "yes"
},
{
"website": "https://www.eastbaytimes.com",
"prompt": "Is there a Trending Nationally section that lists articles?",
"expected_output": "yes"
},
{
"website": "https://www.manchestereveningnews.co.uk",
"prompt": "is the content focused on Manchester sports news?",
"expected_output": "no"
},
{
"website": "https://physicsworld.com",
"prompt": "does the site provide at least 15 updates on the latest physics research?",
"expected_output": "yes"
},
{
"website": "https://richmondconfidential.org",
"prompt": "does the page contains more than 4 articles?",
"expected_output": "yes"
},
{
"website": "https://www.techinasia.com",
"prompt": "is there at least 10 articles of the startup scene in Asia?",
"expected_output": "yes",
"notes": "The website has a paywall and bot detectors."
},
{
"website": "https://www.boardgamegeek.com",
"prompt": "are there more than 5 board game news?",
"expected_output": "yes"
},
{
"website": "https://www.mountainproject.com",
"prompt": "Are there more than 3 climbing guides for Arizona?",
"expected_output": "yes"
}
]

View File

@ -0,0 +1,162 @@
import request from "supertest";
import dotenv from "dotenv";
import Anthropic from "@anthropic-ai/sdk";
import { numTokensFromString } from "./utils/tokens";
import OpenAI from "openai";
import { WebsiteScrapeError } from "./utils/types";
import { logErrors } from "./utils/log";
const websitesData = require("./data/websites.json");
import "dotenv/config";
const fs = require('fs');
dotenv.config();
interface WebsiteData {
website: string;
prompt: string;
expected_output: string;
}
const TEST_URL = "http://127.0.0.1:3002";
describe("Scraping/Crawling Checkup (E2E)", () => {
beforeAll(() => {
if (!process.env.TEST_API_KEY) {
throw new Error("TEST_API_KEY is not set");
}
if (!process.env.OPENAI_API_KEY) {
throw new Error("OPENAI_API_KEY is not set");
}
});
describe("Scraping website tests with a dataset", () => {
it("Should scrape the website and prompt it against OpenAI", async () => {
let passedTests = 0;
const batchSize = 15; // Adjusted to comply with the rate limit of 15 per minute
const batchPromises = [];
let totalTokens = 0;
const startTime = new Date().getTime();
const date = new Date();
const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`;
let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`;
const errorLog: WebsiteScrapeError[] = [];
for (let i = 0; i < websitesData.length; i += batchSize) {
// Introducing delay to respect the rate limit of 15 requests per minute
await new Promise(resolve => setTimeout(resolve, 10000));
const batch = websitesData.slice(i, i + batchSize);
const batchPromise = Promise.all(
batch.map(async (websiteData: WebsiteData) => {
try {
const scrapedContent = await request(TEST_URL || "")
.post("/v0/scrape")
.set("Content-Type", "application/json")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.send({ url: websiteData.website, pageOptions: { onlyMainContent: true } });
if (scrapedContent.statusCode !== 200) {
console.error(`Failed to scrape ${websiteData.website}`);
return null;
}
const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
});
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const prompt = `Based on this markdown extracted from a website html page, ${websiteData.prompt} Just say 'yes' or 'no' to the question.\nWebsite markdown: ${scrapedContent.body.data.markdown}\n`;
const msg = await openai.chat.completions.create({
model: "gpt-4-turbo",
max_tokens: 100,
temperature: 0,
messages: [
{
role: "user",
content: prompt
},
],
});
if (!msg) {
console.error(`Failed to prompt for ${websiteData.website}`);
errorLog.push({
website: websiteData.website,
prompt: websiteData.prompt,
expected_output: websiteData.expected_output,
actual_output: "",
error: "Failed to prompt... model error."
});
return null;
}
const actualOutput = (msg.choices[0].message.content ?? "").toLowerCase()
const expectedOutput = websiteData.expected_output.toLowerCase();
const numTokens = numTokensFromString(prompt,"gpt-4") + numTokensFromString(actualOutput,"gpt-4");
totalTokens += numTokens;
if (actualOutput.includes(expectedOutput)) {
passedTests++;
} else {
console.error(
`This website failed the test: ${websiteData.website}`
);
console.error(`Actual output: ${actualOutput}`);
errorLog.push({
website: websiteData.website,
prompt: websiteData.prompt,
expected_output: websiteData.expected_output,
actual_output: actualOutput,
error: "Output mismatch"
});
}
return {
website: websiteData.website,
prompt: websiteData.prompt,
expectedOutput,
actualOutput,
};
} catch (error) {
console.error(
`Error processing ${websiteData.website}: ${error}`
);
return null;
}
})
);
batchPromises.push(batchPromise);
}
(await Promise.all(batchPromises)).flat();
const score = (passedTests / websitesData.length) * 100;
const endTime = new Date().getTime();
const timeTaken = (endTime - startTime) / 1000;
console.log(`Score: ${score}%`);
console.log(`Total tokens: ${totalTokens}`);
await logErrors(errorLog, timeTaken, totalTokens, score, websitesData.length);
if (process.env.ENV === "local" && errorLog.length > 0) {
if (!fs.existsSync(logsDir)){
fs.mkdirSync(logsDir, { recursive: true });
}
fs.writeFileSync(errorLogFileName, JSON.stringify(errorLog, null, 2));
}
expect(score).toBeGreaterThanOrEqual(80);
}, 350000); // 150 seconds timeout
});
});

View File

@ -0,0 +1,5 @@
module.exports = {
preset: "ts-jest",
testEnvironment: "node",
setupFiles: ["./jest.setup.js"],
};

View File

View File

@ -0,0 +1,26 @@
{
"name": "test-suite",
"version": "1.0.0",
"description": "",
"scripts": {
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false"
},
"author": "",
"license": "ISC",
"dependencies": {
"@anthropic-ai/sdk": "^0.20.8",
"@dqbd/tiktoken": "^1.0.14",
"@supabase/supabase-js": "^2.43.1",
"dotenv": "^16.4.5",
"jest": "^29.7.0",
"openai": "^4.40.2",
"playwright": "^1.43.1",
"supertest": "^7.0.0",
"ts-jest": "^29.1.2"
},
"devDependencies": {
"@types/jest": "^29.5.12",
"@types/supertest": "^6.0.2",
"typescript": "^5.4.5"
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "commonjs", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
// "outDir": "./", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
}
}

View File

@ -0,0 +1,10 @@
import { supabase_service } from "./supabase";
import { WebsiteScrapeError } from "./types";
export async function logErrors(dataError: WebsiteScrapeError[], time_taken: number, num_tokens:number, score: number, num_pages_tested: number,) {
try {
await supabase_service.from("test_suite_logs").insert([{log:dataError, time_taken, num_tokens, score, num_pages_tested, is_error: dataError.length > 0}]);
} catch (error) {
console.error(`Error logging to supabase: ${error}`);
}
}

View File

@ -0,0 +1,47 @@
const getRandomLinksFromContent = async (options: {
content: string;
excludes: string[];
limit: number;
}): Promise<string[]> => {
const regex = /(?<=\()https:\/\/(.*?)(?=\))/g;
const links = options.content.match(regex);
const filteredLinks = links
? links.filter(
(link) => !options.excludes.some((exclude) => link.includes(exclude))
)
: [];
const uniqueLinks = [...new Set(filteredLinks)]; // Ensure all links are unique
const randomLinks = [];
while (randomLinks.length < options.limit && uniqueLinks.length > 0) {
const randomIndex = Math.floor(Math.random() * uniqueLinks.length);
randomLinks.push(uniqueLinks.splice(randomIndex, 1)[0]);
}
return randomLinks;
};
function fuzzyContains(options: {
largeText: string;
queryText: string;
threshold?: number;
}): boolean {
// Normalize texts: lowercasing and removing non-alphanumeric characters
const normalize = (text: string) =>
text.toLowerCase().replace(/[^a-z0-9]+/g, " ");
const normalizedLargeText = normalize(options.largeText);
const normalizedQueryText = normalize(options.queryText);
// Split the query into words
const queryWords = normalizedQueryText.split(/\s+/);
// Count how many query words are in the large text
const matchCount = queryWords.reduce((count, word) => {
return count + (normalizedLargeText.includes(word) ? 1 : 0);
}, 0);
// Calculate the percentage of words matched
const matchPercentage = matchCount / queryWords.length;
// Check if the match percentage meets or exceeds the threshold
return matchPercentage >= (options.threshold || 0.8);
}

View File

@ -0,0 +1,56 @@
import { createClient, SupabaseClient } from "@supabase/supabase-js";
import "dotenv/config";
// SupabaseService class initializes the Supabase client conditionally based on environment variables.
class SupabaseService {
private client: SupabaseClient | null = null;
constructor() {
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
// Warn the user that Authentication is disabled by setting the client to null
console.warn(
"\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"
);
this.client = null;
} else if (!supabaseUrl || !supabaseServiceToken) {
console.error(
"\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"
);
} else {
this.client = createClient(supabaseUrl, supabaseServiceToken);
}
}
// Provides access to the initialized Supabase client, if available.
getClient(): SupabaseClient | null {
return this.client;
}
}
// Using a Proxy to handle dynamic access to the Supabase client or service methods.
// This approach ensures that if Supabase is not configured, any attempt to use it will result in a clear error.
export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
const client = target.getClient();
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
if (client === null) {
console.error(
"Attempted to access Supabase client when it's not configured."
);
return () => {
throw new Error("Supabase client is not configured.");
};
}
// Direct access to SupabaseService properties takes precedence.
if (prop in target) {
return Reflect.get(target, prop, receiver);
}
// Otherwise, delegate access to the Supabase client.
return Reflect.get(client, prop, receiver);
},
}
) as unknown as SupabaseClient;

View File

@ -0,0 +1,16 @@
import { encoding_for_model } from "@dqbd/tiktoken";
import { TiktokenModel } from "@dqbd/tiktoken";
// This function calculates the number of tokens in a text string using GPT-3.5-turbo model
export function numTokensFromString(message: string, model: string): number {
const encoder = encoding_for_model(model as TiktokenModel);
// Encode the message into tokens
const tokens = encoder.encode(message);
// Free the encoder resources after use
encoder.free();
// Return the number of tokens
return tokens.length;
}

View File

@ -0,0 +1,7 @@
export interface WebsiteScrapeError {
website: string;
prompt: string;
expected_output: string;
actual_output: string;
error: string;
}