Resolved merge conflicts between feat/added-anthropic-vision-api and main
This commit is contained in:
commit
942ac3b41c
14
apps/api/.env.local
Normal file
14
apps/api/.env.local
Normal file
@ -0,0 +1,14 @@
|
||||
NUM_WORKERS_PER_QUEUE=8
|
||||
PORT=
|
||||
HOST=
|
||||
SUPABASE_ANON_TOKEN=
|
||||
SUPABASE_URL=
|
||||
SUPABASE_SERVICE_TOKEN=
|
||||
REDIS_URL=
|
||||
SCRAPING_BEE_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
BULL_AUTH_KEY=
|
||||
LOGTAIL_KEY=
|
||||
PLAYWRIGHT_MICROSERVICE_URL=
|
||||
|
@ -41,6 +41,7 @@
|
||||
"typescript": "^5.4.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.20.5",
|
||||
"@brillout/import": "^0.2.2",
|
||||
"@bull-board/api": "^5.14.2",
|
||||
"@bull-board/express": "^5.8.0",
|
||||
|
@ -5,6 +5,9 @@ settings:
|
||||
excludeLinksFromLockfile: false
|
||||
|
||||
dependencies:
|
||||
'@anthropic-ai/sdk':
|
||||
specifier: ^0.20.5
|
||||
version: 0.20.5
|
||||
'@brillout/import':
|
||||
specifier: ^0.2.2
|
||||
version: 0.2.3
|
||||
@ -225,6 +228,21 @@ packages:
|
||||
'@jridgewell/trace-mapping': 0.3.25
|
||||
dev: true
|
||||
|
||||
/@anthropic-ai/sdk@0.20.5:
|
||||
resolution: {integrity: sha512-d0ch+zp6/gHR4+2wqWV7JU1EJ7PpHc3r3F6hebovJTouY+pkaId1FuYYaVsG3l/gyqhOZUwKCMSMqcFNf+ZmWg==}
|
||||
dependencies:
|
||||
'@types/node': 18.19.22
|
||||
'@types/node-fetch': 2.6.11
|
||||
abort-controller: 3.0.0
|
||||
agentkeepalive: 4.5.0
|
||||
form-data-encoder: 1.7.2
|
||||
formdata-node: 4.4.1
|
||||
node-fetch: 2.7.0
|
||||
web-streams-polyfill: 3.3.3
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
dev: false
|
||||
|
||||
/@anthropic-ai/sdk@0.9.1:
|
||||
resolution: {integrity: sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==}
|
||||
dependencies:
|
||||
|
@ -193,7 +193,7 @@ const TEST_URL = "http://127.0.0.1:3002";
|
||||
expect(response.body).toHaveProperty("success");
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
}, 20000);
|
||||
}, 30000); // 30 seconds timeout
|
||||
});
|
||||
|
||||
describe("GET /v0/crawl/status/:jobId", () => {
|
||||
|
@ -4,11 +4,10 @@ import { scrapSingleUrl } from "./single_url";
|
||||
import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap";
|
||||
import { WebCrawler } from "./crawler";
|
||||
import { getValue, setValue } from "../../services/redis";
|
||||
import { getImageDescription } from "./utils/gptVision";
|
||||
import { getImageDescription } from "./utils/imageDescription";
|
||||
import { fetchAndProcessPdf } from "./utils/pdfProcessor";
|
||||
import { replaceImgPathsWithAbsolutePaths, replacePathsWithAbsolutePaths } from "./utils/replacePaths";
|
||||
|
||||
|
||||
export class WebScraperDataProvider {
|
||||
private urls: string[] = [""];
|
||||
private mode: "single_urls" | "sitemap" | "crawl" = "single_urls";
|
||||
@ -21,6 +20,7 @@ export class WebScraperDataProvider {
|
||||
private generateImgAltText: boolean = false;
|
||||
private pageOptions?: PageOptions;
|
||||
private replaceAllPathsWithAbsolutePaths?: boolean = false;
|
||||
private generateImgAltTextModel: "gpt-4-turbo" | "claude-3-opus" = "gpt-4-turbo";
|
||||
|
||||
authorize(): void {
|
||||
throw new Error("Method not implemented.");
|
||||
@ -443,7 +443,7 @@ export class WebScraperDataProvider {
|
||||
imageUrl,
|
||||
backText,
|
||||
frontText
|
||||
);
|
||||
, this.generateImgAltTextModel);
|
||||
}
|
||||
|
||||
document.content = document.content.replace(
|
||||
|
@ -1,41 +0,0 @@
|
||||
export async function getImageDescription(
|
||||
imageUrl: string,
|
||||
backText: string,
|
||||
frontText: string
|
||||
): Promise<string> {
|
||||
const { OpenAI } = require("openai");
|
||||
const openai = new OpenAI();
|
||||
|
||||
try {
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "gpt-4-turbo",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text:
|
||||
"What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
|
||||
backText +
|
||||
" and the following text: " +
|
||||
frontText +
|
||||
". Be super concise.",
|
||||
},
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: imageUrl,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return response.choices[0].message.content;
|
||||
} catch (error) {
|
||||
console.error("Error generating image alt text:", error?.message);
|
||||
return "";
|
||||
}
|
||||
}
|
88
apps/api/src/scraper/WebScraper/utils/imageDescription.ts
Normal file
88
apps/api/src/scraper/WebScraper/utils/imageDescription.ts
Normal file
@ -0,0 +1,88 @@
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import axios from 'axios';
|
||||
|
||||
export async function getImageDescription(
|
||||
imageUrl: string,
|
||||
backText: string,
|
||||
frontText: string,
|
||||
model: string = "gpt-4-turbo"
|
||||
): Promise<string> {
|
||||
try {
|
||||
const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " +
|
||||
backText +
|
||||
" and the following text: " +
|
||||
frontText +
|
||||
". Be super concise."
|
||||
|
||||
switch (model) {
|
||||
case 'claude-3-opus': {
|
||||
if (!process.env.ANTHROPIC_API_KEY) {
|
||||
throw new Error("No Anthropic API key provided");
|
||||
}
|
||||
const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' });
|
||||
const imageMediaType = 'image/png';
|
||||
const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64');
|
||||
|
||||
const anthropic = new Anthropic();
|
||||
const response = await anthropic.messages.create({
|
||||
model: "claude-3-opus-20240229",
|
||||
max_tokens: 1024,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: imageMediaType,
|
||||
data: imageData,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: prompt
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
return response.content[0].text;
|
||||
}
|
||||
default: {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error("No OpenAI API key provided");
|
||||
}
|
||||
|
||||
const { OpenAI } = require("openai");
|
||||
const openai = new OpenAI();
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "gpt-4-turbo",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: prompt,
|
||||
},
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: imageUrl,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
return response.choices[0].message.content;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error generating image alt text:", error?.message);
|
||||
return "";
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user