From 00941d94a40ade640c6dfacbc567af8d4f04d426 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:03:48 -0300 Subject: [PATCH] Added anthropic vision to getImageDescription function --- apps/api/.env.local | 1 + apps/api/package.json | 1 + apps/api/pnpm-lock.yaml | 18 ++++ apps/api/src/scraper/WebScraper/index.ts | 6 +- .../src/scraper/WebScraper/utils/gptVision.ts | 41 -------- .../WebScraper/utils/imageDescription.ts | 98 +++++++++++++++++++ 6 files changed, 122 insertions(+), 43 deletions(-) delete mode 100644 apps/api/src/scraper/WebScraper/utils/gptVision.ts create mode 100644 apps/api/src/scraper/WebScraper/utils/imageDescription.ts diff --git a/apps/api/.env.local b/apps/api/.env.local index 301c64b..88133b7 100644 --- a/apps/api/.env.local +++ b/apps/api/.env.local @@ -7,6 +7,7 @@ SUPABASE_SERVICE_TOKEN= REDIS_URL= SCRAPING_BEE_API_KEY= OPENAI_API_KEY= +ANTHROPIC_API_KEY= BULL_AUTH_KEY= LOGTAIL_KEY= PLAYWRIGHT_MICROSERVICE_URL= diff --git a/apps/api/package.json b/apps/api/package.json index 9e3a3d8..a951aaf 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -39,6 +39,7 @@ "typescript": "^5.4.2" }, "dependencies": { + "@anthropic-ai/sdk": "^0.20.5", "@brillout/import": "^0.2.2", "@bull-board/api": "^5.14.2", "@bull-board/express": "^5.8.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 3539868..08b1de2 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -5,6 +5,9 @@ settings: excludeLinksFromLockfile: false dependencies: + '@anthropic-ai/sdk': + specifier: ^0.20.5 + version: 0.20.5 '@brillout/import': specifier: ^0.2.2 version: 0.2.3 @@ -213,6 +216,21 @@ packages: '@jridgewell/trace-mapping': 0.3.25 dev: true + /@anthropic-ai/sdk@0.20.5: + resolution: {integrity: sha512-d0ch+zp6/gHR4+2wqWV7JU1EJ7PpHc3r3F6hebovJTouY+pkaId1FuYYaVsG3l/gyqhOZUwKCMSMqcFNf+ZmWg==} + dependencies: + '@types/node': 18.19.22 + '@types/node-fetch': 2.6.11 + abort-controller: 3.0.0 + agentkeepalive: 4.5.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0 + web-streams-polyfill: 3.3.3 + transitivePeerDependencies: + - encoding + dev: false + /@anthropic-ai/sdk@0.9.1: resolution: {integrity: sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==} dependencies: diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index b54d9e6..62ea16c 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -4,7 +4,7 @@ import { scrapSingleUrl } from "./single_url"; import { SitemapEntry, fetchSitemapData, getLinksFromSitemap } from "./sitemap"; import { WebCrawler } from "./crawler"; import { getValue, setValue } from "../../services/redis"; -import { getImageDescription } from "./utils/gptVision"; +import { getImageDescription } from "./utils/imageDescription"; export type WebScraperOptions = { urls: string[]; @@ -16,6 +16,7 @@ export type WebScraperOptions = { maxCrawledLinks?: number; limit?: number; generateImgAltText?: boolean; + generateImgAltTextModel?: "gpt-4-turbo" | "anthropic"; }; concurrentRequests?: number; }; @@ -29,6 +30,7 @@ export class WebScraperDataProvider { private limit: number = 10000; private concurrentRequests: number = 20; private generateImgAltText: boolean = false; + private generateImgAltTextModel: "gpt-4-turbo" | "anthropic" = "gpt-4-turbo"; authorize(): void { throw new Error("Method not implemented."); @@ -312,7 +314,7 @@ export class WebScraperDataProvider { let backText = document.content.substring(imageIndex + image.length, Math.min(imageIndex + image.length + 1000, contentLength)); let frontTextStartIndex = Math.max(imageIndex - 1000, 0); let frontText = document.content.substring(frontTextStartIndex, imageIndex); - altText = await getImageDescription(newImageUrl, backText, frontText); + altText = await getImageDescription(newImageUrl, backText, frontText, this.generateImgAltTextModel); } document.content = document.content.replace(image, `![${altText}](${newImageUrl})`); diff --git a/apps/api/src/scraper/WebScraper/utils/gptVision.ts b/apps/api/src/scraper/WebScraper/utils/gptVision.ts deleted file mode 100644 index 7458a56..0000000 --- a/apps/api/src/scraper/WebScraper/utils/gptVision.ts +++ /dev/null @@ -1,41 +0,0 @@ -export async function getImageDescription( - imageUrl: string, - backText: string, - frontText: string -): Promise { - const { OpenAI } = require("openai"); - const openai = new OpenAI(); - - try { - const response = await openai.chat.completions.create({ - model: "gpt-4-turbo", - messages: [ - { - role: "user", - content: [ - { - type: "text", - text: - "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " + - backText + - " and the following text: " + - frontText + - ". Be super concise.", - }, - { - type: "image_url", - image_url: { - url: imageUrl, - }, - }, - ], - }, - ], - }); - - return response.choices[0].message.content; - } catch (error) { - console.error("Error generating image alt text:", error?.message); - return ""; - } -} diff --git a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts new file mode 100644 index 0000000..d2db37b --- /dev/null +++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts @@ -0,0 +1,98 @@ +import Anthropic from '@anthropic-ai/sdk'; +import axios from 'axios'; + +export async function getImageDescription( + imageUrl: string, + backText: string, + frontText: string, + model: string = "gpt-4-turbo" +): Promise { + try { + const prompt = "What's in the image? You need to answer with the content for the alt tag of the image. To help you with the context, the image is in the following text: " + + backText + + " and the following text: " + + frontText + + ". Be super concise." + + switch (model) { + case 'anthropic': { + if (!process.env.ANTHROPIC_API_KEY) { + throw new Error("No Anthropic API key provided"); + } + const imageRequest = await axios.get(imageUrl, { responseType: 'arraybuffer' }); + const imageMediaType = 'image/png'; + const imageData = Buffer.from(imageRequest.data, 'binary').toString('base64'); + + const anthropic = new Anthropic(); + const response = await anthropic.messages.create({ + model: "claude-3-opus-20240229", + max_tokens: 1024, + messages: [ + { + role: "user", + content: [ + { + type: "image", + source: { + type: "base64", + media_type: imageMediaType, + data: imageData, + }, + }, + { + type: "text", + text: prompt + } + ], + } + ] + }); + + return response.content[0].text; + + // const response = await anthropic.messages.create({ + // messages: [ + // { + // role: "user", + // content: prompt, + // }, + // ], + // }); + + } + default: { + if (!process.env.OPENAI_API_KEY) { + throw new Error("No OpenAI API key provided"); + } + + const { OpenAI } = require("openai"); + const openai = new OpenAI(); + + const response = await openai.chat.completions.create({ + model: "gpt-4-turbo", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: prompt, + }, + { + type: "image_url", + image_url: { + url: imageUrl, + }, + }, + ], + }, + ], + }); + return response.choices[0].message.content; + } + } + } catch (error) { + console.error("Error generating image alt text:", error?.message); + return ""; + } +}