diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts index ea6ddfd..6614dbd 100644 --- a/apps/api/src/lib/LLM-extraction/index.ts +++ b/apps/api/src/lib/LLM-extraction/index.ts @@ -1,4 +1,3 @@ -import Turndown from "turndown"; import OpenAI from "openai"; import Ajv from "ajv"; const ajv = new Ajv(); // Initialize AJV for JSON schema validation diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts new file mode 100644 index 0000000..5f6c34f --- /dev/null +++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts @@ -0,0 +1,45 @@ +export async function handleCustomScraping( + text: string, + url: string +): Promise<{ scraper: string; url: string; wait_after_load: number } | null> { + // Check for Readme Docs special case + if (text.includes(' { - // Check for Readme Docs special case - if (text.includes('