Caleb: first test passing
This commit is contained in:
parent
06497729e2
commit
2ad7a58eb7
@ -8,7 +8,7 @@ dotenv.config();
|
||||
const TEST_URL = "http://127.0.0.1:3002";
|
||||
|
||||
|
||||
describe.only("E2E Tests for API Routes", () => {
|
||||
describe("E2E Tests for API Routes", () => {
|
||||
beforeAll(() => {
|
||||
process.env.USE_DB_AUTHENTICATION = "true";
|
||||
});
|
||||
@ -252,7 +252,7 @@ const TEST_URL = "http://127.0.0.1:3002";
|
||||
}, 60000); // 60 seconds
|
||||
});
|
||||
|
||||
describe("POST /v0/scrape with LLM Extraction", () => {
|
||||
describe.only("POST /v0/scrape with LLM Extraction", () => {
|
||||
it("should extract data using LLM extraction mode", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
@ -264,9 +264,9 @@ const TEST_URL = "http://127.0.0.1:3002";
|
||||
onlyMainContent: true
|
||||
},
|
||||
extractorOptions: {
|
||||
extractorMode: "llm-extract",
|
||||
extractor_prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||
extractorSchema: {
|
||||
mode: "llm-extraction",
|
||||
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||
extractionSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
@ -284,14 +284,33 @@ const TEST_URL = "http://127.0.0.1:3002";
|
||||
}
|
||||
});
|
||||
|
||||
console.log("Response:", response.body);
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("company_mission");
|
||||
expect(response.body.data).toHaveProperty("supports_sso");
|
||||
expect(response.body.data).toHaveProperty("is_open_source");
|
||||
});
|
||||
// Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
|
||||
let llmExtraction = response.body.data.llm_extraction;
|
||||
|
||||
|
||||
// Check if llm_extraction is a string and parse it if necessary
|
||||
if (typeof llmExtraction === 'string') {
|
||||
llmExtraction = JSON.parse(llmExtraction);
|
||||
}
|
||||
|
||||
|
||||
console.log('llm extraction', llmExtraction);
|
||||
|
||||
// Print the keys of the response.body for debugging purposes
|
||||
|
||||
|
||||
|
||||
// Check if the llm_extraction object has the required properties with correct types and values
|
||||
expect(llmExtraction).toHaveProperty("company_mission");
|
||||
expect(typeof llmExtraction.company_mission).toBe("string");
|
||||
expect(llmExtraction).toHaveProperty("supports_sso");
|
||||
expect(llmExtraction.supports_sso).toBe(true);
|
||||
expect(typeof llmExtraction.supports_sso).toBe("boolean");
|
||||
expect(llmExtraction).toHaveProperty("is_open_source");
|
||||
expect(llmExtraction.is_open_source).toBe(false);
|
||||
expect(typeof llmExtraction.is_open_source).toBe("boolean");
|
||||
}, 60000); // 60 secs
|
||||
});
|
||||
|
||||
describe("GET /is-production", () => {
|
||||
|
@ -3,46 +3,39 @@ import OpenAI from 'openai'
|
||||
// import { LlamaModel } from 'node-llama-cpp'
|
||||
import { z } from 'zod'
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema'
|
||||
|
||||
import {
|
||||
ScraperCompletionResult,
|
||||
generateOpenAICompletions,
|
||||
} from './models.js'
|
||||
import { ExtractorOptions } from '../entities.js'
|
||||
} from './models'
|
||||
import { Document, ExtractorOptions } from '../entities'
|
||||
|
||||
// Generate completion using OpenAI
|
||||
export function generateCompletions(
|
||||
export async function generateCompletions(
|
||||
documents: Document[],
|
||||
extractionOptions: ExtractorOptions
|
||||
): Promise < ScraperCompletionResult < T >> [] {
|
||||
): Promise<Document[]> {
|
||||
// const schema = zodToJsonSchema(options.schema)
|
||||
|
||||
const schema = extractionOptions.extractionSchema;
|
||||
const prompt = extractionOptions.extractionPrompt;
|
||||
|
||||
const loader = documents.map(async (document, i) => {
|
||||
switch (this.client.constructor) {
|
||||
case true:
|
||||
return generateOpenAICompletions<T>(
|
||||
this.client as OpenAI,
|
||||
const switchVariable = "openAI" // Placholder, want to think more about how we abstract the model provider
|
||||
|
||||
schema,
|
||||
options?.prompt,
|
||||
options?.temperature
|
||||
)
|
||||
|
||||
//TODO add other models
|
||||
// case LlamaModel:
|
||||
// return generateLlamaCompletions<T>(
|
||||
// this.client,
|
||||
// await page,
|
||||
// schema,
|
||||
// options?.prompt,
|
||||
// options?.temperature
|
||||
// )
|
||||
const completions = await Promise.all(documents.map(async (document: Document) => {
|
||||
switch (switchVariable) {
|
||||
case "openAI":
|
||||
const llm = new OpenAI();
|
||||
return await generateOpenAICompletions({
|
||||
client: llm,
|
||||
document: document,
|
||||
schema: schema,
|
||||
prompt: prompt
|
||||
});
|
||||
default:
|
||||
throw new Error('Invalid client')
|
||||
throw new Error('Invalid client');
|
||||
}
|
||||
})
|
||||
}));
|
||||
|
||||
return loader
|
||||
return completions;
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import { getImageDescription } from "./utils/imageDescription";
|
||||
import { fetchAndProcessPdf } from "./utils/pdfProcessor";
|
||||
import { replaceImgPathsWithAbsolutePaths, replacePathsWithAbsolutePaths } from "./utils/replacePaths";
|
||||
import OpenAI from 'openai'
|
||||
import { generateCompletions } from "../../lib/LLM-extraction";
|
||||
|
||||
|
||||
export class WebScraperDataProvider {
|
||||
@ -194,20 +195,14 @@ export class WebScraperDataProvider {
|
||||
documents = await this.getSitemapData(baseUrl, documents);
|
||||
documents = documents.concat(pdfDocuments);
|
||||
|
||||
|
||||
|
||||
|
||||
console.log("extraction mode ", this.extractorOptions.mode)
|
||||
if(this.extractorOptions.mode === "llm-extraction") {
|
||||
|
||||
// const llm = new OpenAI()
|
||||
// generateCompletions(
|
||||
// client=llm,
|
||||
// page =,
|
||||
// schema=
|
||||
|
||||
// )
|
||||
|
||||
|
||||
const llm = new OpenAI()
|
||||
documents = await generateCompletions(
|
||||
documents,
|
||||
this.extractorOptions
|
||||
)
|
||||
}
|
||||
|
||||
await this.setCachedDocuments(documents);
|
||||
|
Loading…
Reference in New Issue
Block a user