0

Merge branch 'main' into main

This commit is contained in:
Rafael Miller 2024-06-05 14:44:29 -03:00 committed by GitHub
commit 64423441b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
94 changed files with 9781 additions and 1117 deletions

View File

@ -1,14 +1,24 @@
"""
checks local verions against published verions.
Usage:
# Usage:
Unix:
python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js
Windows:
python .github\scripts\check_version_has_incremented.py js .\apps\js-sdk\firecrawl @mendable/firecrawl-js
Local version: 0.0.22
Published version: 0.0.21
true
Unix:
python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py
Windows:
python .github\scripts\check_version_has_incremented.py python .\apps\python-sdk\firecrawl firecrawl-py
Local version: 0.0.11
Published version: 0.0.11
false

2
.github/scripts/requirements.txt vendored Normal file
View File

@ -0,0 +1,2 @@
requests
packaging

37
.github/workflows/fly-direct.yml vendored Normal file
View File

@ -0,0 +1,37 @@
name: Fly Deploy Direct
on:
schedule:
- cron: '0 * * * *'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
HOST: ${{ secrets.HOST }}
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
PORT: ${{ secrets.PORT }}
REDIS_URL: ${{ secrets.REDIS_URL }}
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Change directory
run: cd apps/api
- uses: superfly/flyctl-actions/setup-flyctl@master
- run: flyctl deploy ./apps/api --remote-only -a firecrawl-scraper-js
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}

View File

@ -3,8 +3,8 @@ on:
push:
branches:
- main
# schedule:
# - cron: '0 */4 * * *'
schedule:
- cron: '0 */2 * * *'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

View File

@ -39,7 +39,7 @@ SUPABASE_SERVICE_TOKEN=
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= #
BULL_AUTH_KEY= @
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs

View File

@ -1,6 +1,6 @@
# 🔥 Firecrawl
Crawl and convert any website into LLM-ready markdown. Built by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) and the firecrawl community.
Crawl and convert any website into LLM-ready markdown or structured data. Built by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) and the Firecrawl community. Includes powerful scraping, crawling and data extraction capabilities.
_This repository is in its early development stages. We are still merging custom modules in the mono repo. It's not completely yet ready for full self-host deployment, but you can already run it locally._

View File

@ -29,3 +29,6 @@ docker compose up
This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
# Install Firecrawl on a Kubernetes Cluster (Simple Version)
Read the [examples/k8n/README.md](examples/k8n/README.md) for instructions on how to install Firecrawl on a Kubernetes Cluster.

View File

@ -3,7 +3,7 @@ NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://localhost:6379
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html
## To turn on DB authentication, you need to set up supabase.
USE_DB_AUTHENTICATION=true
@ -21,7 +21,7 @@ RATE_LIMIT_TEST_API_KEY_SCRAPE= # set if you'd like to test the scraping rate li
RATE_LIMIT_TEST_API_KEY_CRAWL= # set if you'd like to test the crawling rate limit
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
BULL_AUTH_KEY= #
BULL_AUTH_KEY= @
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
@ -31,6 +31,13 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs
STRIPE_PRICE_ID_STANDARD=
STRIPE_PRICE_ID_SCALE=
STRIPE_PRICE_ID_STARTER=
STRIPE_PRICE_ID_HOBBY=
STRIPE_PRICE_ID_HOBBY_YEARLY=
STRIPE_PRICE_ID_STANDARD_NEW=
STRIPE_PRICE_ID_STANDARD_NEW_YEARLY=
STRIPE_PRICE_ID_GROWTH=
STRIPE_PRICE_ID_GROWTH_YEARLY=
HYPERDX_API_KEY=
HDX_NODE_BETA_MODE=1
@ -43,3 +50,6 @@ PROXY_USERNAME=
PROXY_PASSWORD=
# set if you'd like to block media requests to save proxy bandwidth
BLOCK_MEDIA=
# Set this to the URL of your webhook when using the self-hosted version of FireCrawl
SELF_HOSTED_WEBHOOK_URL=

View File

@ -24,14 +24,14 @@ kill_timeout = '5s'
[http_service.concurrency]
type = "requests"
hard_limit = 200
soft_limit = 100
hard_limit = 100
soft_limit = 50
[[http_service.checks]]
grace_period = "10s"
grace_period = "20s"
interval = "30s"
method = "GET"
timeout = "5s"
timeout = "15s"
path = "/"
[[services]]
@ -50,11 +50,11 @@ kill_timeout = '5s'
[services.concurrency]
type = 'connections'
hard_limit = 75
soft_limit = 30
hard_limit = 30
soft_limit = 12
[[vm]]
size = 'performance-4x'
size = 'performance-8x'
processes = ['app']

View File

@ -29,6 +29,7 @@
"@types/cors": "^2.8.13",
"@types/express": "^4.17.17",
"@types/jest": "^29.5.12",
"@types/node": "^20.14.1",
"body-parser": "^1.20.1",
"express": "^4.18.2",
"jest": "^29.6.3",

File diff suppressed because it is too large Load Diff

View File

@ -66,9 +66,9 @@ describe("E2E Tests for API Routes", () => {
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
.send({ url: "https://roastmywebsite.ai" });
expect(response.statusCode).toBe(200);
}, 10000); // 10 seconds timeout
}, 30000); // 30 seconds timeout
it("should return a successful response with a valid API key", async () => {
const response = await request(TEST_URL)
@ -185,7 +185,7 @@ describe("E2E Tests for API Routes", () => {
);
});
it("should return a successful response with a valid API key", async () => {
it("should return a successful response with a valid API key for crawl", async () => {
const response = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -529,7 +529,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(408);
}, 3000);
it("should return a successful response with a valid API key", async () => {
it("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
const response = await request(TEST_URL)
.post("/v0/crawlWebsitePreview")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
@ -558,7 +558,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
it("should return a successful response with a valid API key", async () => {
it("should return a successful response with a valid API key for search", async () => {
const response = await request(TEST_URL)
.post("/v0/search")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -1004,7 +1004,7 @@ describe("E2E Tests for API Routes", () => {
describe("Rate Limiter", () => {
it("should return 429 when rate limit is exceeded for preview token", async () => {
for (let i = 0; i < 5; i++) {
for (let i = 0; i < 4; i++) {
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)

View File

@ -29,6 +29,7 @@ export async function supaAuthenticateUser(
team_id?: string;
error?: string;
status?: number;
plan?: string;
}> {
const authHeader = req.headers.authorization;
if (!authHeader) {
@ -104,12 +105,13 @@ export async function supaAuthenticateUser(
case RateLimiterMode.Scrape:
rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan);
break;
case RateLimiterMode.Search:
rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan);
break;
case RateLimiterMode.CrawlStatus:
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
break;
case RateLimiterMode.Search:
rateLimiter = getRateLimiter(RateLimiterMode.Search, token);
break;
case RateLimiterMode.Preview:
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
break;
@ -130,7 +132,7 @@ export async function supaAuthenticateUser(
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);
return {
success: false,
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Please retry after ${secs}s, resets at ${retryDate}`,
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
status: 429,
};
}
@ -172,16 +174,24 @@ export async function supaAuthenticateUser(
subscriptionData = data[0];
}
return { success: true, team_id: subscriptionData.team_id };
return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""};
}
function getPlanByPriceId(price_id: string) {
switch (price_id) {
case process.env.STRIPE_PRICE_ID_STARTER:
return 'starter';
case process.env.STRIPE_PRICE_ID_STANDARD:
return 'standard';
case process.env.STRIPE_PRICE_ID_SCALE:
return 'scale';
case process.env.STRIPE_PRICE_ID_HOBBY || process.env.STRIPE_PRICE_ID_HOBBY_YEARLY:
return 'hobby';
case process.env.STRIPE_PRICE_ID_STANDARD_NEW || process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY:
return 'standard-new';
case process.env.STRIPE_PRICE_ID_GROWTH || process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
return 'growth';
default:
return 'starter';
return 'free';
}
}

View File

@ -15,7 +15,8 @@ export async function scrapeHelper(
crawlerOptions: any,
pageOptions: PageOptions,
extractorOptions: ExtractorOptions,
timeout: number
timeout: number,
plan?: string
): Promise<{
success: boolean;
error?: string;
@ -64,7 +65,9 @@ export async function scrapeHelper(
}
let creditsToBeBilled = filteredDocs.length;
const creditsPerLLMExtract = 5;
const creditsPerLLMExtract = plan === "starter" ? 5 : 50;
if (extractorOptions.mode === "llm-extraction") {
creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
@ -93,7 +96,7 @@ export async function scrapeHelper(
export async function scrapeController(req: Request, res: Response) {
try {
// make sure to authenticate user first, Bearer <token>
const { success, team_id, error, status } = await authenticateUser(
const { success, team_id, error, status, plan } = await authenticateUser(
req,
res,
RateLimiterMode.Scrape
@ -102,7 +105,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(status).json({ error });
}
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, waitFor: 0 };
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, waitFor: 0, screenshot: false };
const extractorOptions = req.body.extractorOptions ?? {
mode: "markdown"
}
@ -129,7 +132,8 @@ export async function scrapeController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
extractorOptions,
timeout
timeout,
plan
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;

View File

@ -168,3 +168,6 @@ app.get('/serverHealthCheck/notify', async (req, res) => {
app.get("/is-production", (req, res) => {
res.send({ isProduction: global.isProduction });
});
// /workers health check, cant act as load balancer, just has to be a pre deploy thing

View File

@ -1,4 +1,3 @@
import Turndown from "turndown";
import OpenAI from "openai";
import Ajv from "ajv";
const ajv = new Ajv(); // Initialize AJV for JSON schema validation

View File

@ -16,6 +16,8 @@ export type PageOptions = {
fallback?: boolean;
fetchPageContent?: boolean;
waitFor?: number;
screenshot?: boolean;
headers?: Record<string, string>;
};
export type ExtractorOptions = {
@ -106,3 +108,8 @@ export class SearchResult {
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
}
}
export interface FireEngineResponse {
html: string;
screenshot: string;
}

View File

@ -1,42 +1,42 @@
import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
// import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
// const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const scrapInBatches = async (
urls: string[],
batchSize: number,
delayMs: number
) => {
let successCount = 0;
let errorCount = 0;
// const scrapInBatches = async (
// urls: string[],
// batchSize: number,
// delayMs: number
// ) => {
// let successCount = 0;
// let errorCount = 0;
for (let i = 0; i < urls.length; i += batchSize) {
const batch = urls
.slice(i, i + batchSize)
.map((url) => scrapWithFireEngine(url));
try {
const results = await Promise.all(batch);
results.forEach((data, index) => {
if (data.trim() === "") {
errorCount++;
} else {
successCount++;
console.log(
`Scraping result ${i + index + 1}:`,
data.trim().substring(0, 20) + "..."
);
}
});
} catch (error) {
console.error("Error during scraping:", error);
}
await delay(delayMs);
}
// for (let i = 0; i < urls.length; i += batchSize) {
// const batch = urls
// .slice(i, i + batchSize)
// .map((url) => scrapWithFireEngine(url));
// try {
// const results = await Promise.all(batch);
// results.forEach((data, index) => {
// if (data.trim() === "") {
// errorCount++;
// } else {
// successCount++;
// console.log(
// `Scraping result ${i + index + 1}:`,
// data.trim().substring(0, 20) + "..."
// );
// }
// });
// } catch (error) {
// console.error("Error during scraping:", error);
// }
// await delay(delayMs);
// }
console.log(`Total successful scrapes: ${successCount}`);
console.log(`Total errored scrapes: ${errorCount}`);
};
function run() {
const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
scrapInBatches(urls, 10, 1000);
}
// console.log(`Total successful scrapes: ${successCount}`);
// console.log(`Total errored scrapes: ${errorCount}`);
// };
// function run() {
// const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
// scrapInBatches(urls, 10, 1000);
// }

View File

@ -0,0 +1,53 @@
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
export async function handleCustomScraping(
text: string,
url: string
): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
// Check for Readme Docs special case
if (text.includes('<meta name="readme-deploy"')) {
console.log(
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
);
return {
scraper: "fire-engine",
url: url,
waitAfterLoad: 1000,
pageOptions: {
scrollXPaths: ['//*[@id="ReferencePlayground"]/section[3]/div/pre/div/div/div[5]']
}
};
}
// Check for Vanta security portals
if (text.includes('<link href="https://static.vanta.com')) {
console.log(
`Vanta link detected for ${url}, using Fire Engine with wait time 3000ms`
);
return {
scraper: "fire-engine",
url: url,
waitAfterLoad: 3000,
};
}
// Check for Google Drive PDF links in the raw HTML
const googleDrivePdfPattern =
/https:\/\/drive\.google\.com\/file\/d\/([^\/]+)\/view/;
const googleDrivePdfLink = text.match(googleDrivePdfPattern);
if (googleDrivePdfLink) {
console.log(
`Google Drive PDF link detected for ${url}: ${googleDrivePdfLink[0]}`
);
const fileId = googleDrivePdfLink[1];
const pdfUrl = `https://drive.google.com/uc?export=download&id=${fileId}`;
return {
scraper: "pdf",
url: pdfUrl
};
}
return null;
}

View File

@ -84,13 +84,15 @@ export class WebScraperDataProvider {
const job = await getWebScraperQueue().getJob(this.bullJobId);
const jobStatus = await job.getState();
if (jobStatus === "failed") {
throw new Error(
console.error(
"Job has failed or has been cancelled by the user. Stopping the job..."
);
return [] as Document[];
}
}
} catch (error) {
console.error(error);
return [] as Document[];
}
}
return results.filter((result) => result !== null) as Document[];
@ -213,6 +215,7 @@ export class WebScraperDataProvider {
return this.returnOnlyUrlsResponse(links, inProgress);
}
let documents = await this.processLinks(links, inProgress);
return this.cacheAndFinalizeDocuments(documents, links);
}

View File

@ -2,11 +2,12 @@ import * as cheerio from "cheerio";
import { ScrapingBeeClient } from "scrapingbee";
import { extractMetadata } from "./utils/metadata";
import dotenv from "dotenv";
import { Document, PageOptions } from "../../lib/entities";
import { Document, PageOptions, FireEngineResponse } from "../../lib/entities";
import { parseMarkdown } from "../../lib/html-to-markdown";
import { excludeNonMainTags } from "./utils/excludeTags";
import { urlSpecificParams } from "./utils/custom/website_params";
import { fetchAndProcessPdf } from "./utils/pdfProcessor";
import { handleCustomScraping } from "./custom/handleCustomScraping";
dotenv.config();
@ -18,7 +19,6 @@ const baseScrapers = [
"fetch",
] as const;
export async function generateRequestParams(
url: string,
wait_browser: string = "domcontentloaded",
@ -45,40 +45,53 @@ export async function generateRequestParams(
export async function scrapWithFireEngine(
url: string,
waitFor: number = 0,
screenshot: boolean = false,
pageOptions: { scrollXPaths?: string[] } = {},
headers?: Record<string, string>,
options?: any
): Promise<string> {
): Promise<FireEngineResponse> {
try {
const reqParams = await generateRequestParams(url);
// If the user has passed a wait parameter in the request, use that
const waitParam = reqParams["params"]?.wait ?? waitFor;
console.log(`[Fire-Engine] Scraping ${url} with wait: ${waitParam}`);
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
console.log(
`[Fire-Engine] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam}`
);
const response = await fetch(process.env.FIRE_ENGINE_BETA_URL + "/scrape", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ url: url, wait: waitParam }),
body: JSON.stringify({
url: url,
wait: waitParam,
screenshot: screenshotParam,
headers: headers,
pageOptions: pageOptions
}),
});
if (!response.ok) {
console.error(
`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`
);
return "";
return { html: "", screenshot: "" };
}
const contentType = response.headers['content-type'];
if (contentType && contentType.includes('application/pdf')) {
return fetchAndProcessPdf(url);
const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) {
return { html: await fetchAndProcessPdf(url), screenshot: "" };
} else {
const data = await response.json();
const html = data.content;
return html ?? "";
const screenshot = data.screenshot;
return { html: html ?? "", screenshot: screenshot ?? "" };
}
} catch (error) {
console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`);
return "";
return { html: "", screenshot: "" };
}
}
@ -104,8 +117,8 @@ export async function scrapWithScrapingBee(
return "";
}
const contentType = response.headers['content-type'];
if (contentType && contentType.includes('application/pdf')) {
const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) {
return fetchAndProcessPdf(url);
} else {
const decoder = new TextDecoder();
@ -118,7 +131,11 @@ export async function scrapWithScrapingBee(
}
}
export async function scrapWithPlaywright(url: string, waitFor: number = 0): Promise<string> {
export async function scrapWithPlaywright(
url: string,
waitFor: number = 0,
headers?: Record<string, string>
): Promise<string> {
try {
const reqParams = await generateRequestParams(url);
// If the user has passed a wait parameter in the request, use that
@ -129,7 +146,7 @@ export async function scrapWithPlaywright(url: string, waitFor: number = 0): Pro
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ url: url, wait: waitParam }),
body: JSON.stringify({ url: url, wait_after_load: waitParam, headers: headers }),
});
if (!response.ok) {
@ -139,16 +156,22 @@ export async function scrapWithPlaywright(url: string, waitFor: number = 0): Pro
return "";
}
const contentType = response.headers['content-type'];
if (contentType && contentType.includes('application/pdf')) {
const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) {
return fetchAndProcessPdf(url);
} else {
const data = await response.json();
const textData = await response.text();
try {
const data = JSON.parse(textData);
const html = data.content;
return html ?? "";
} catch (jsonError) {
console.error(`[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}`);
return "";
}
}
} catch (error) {
console.error(`[Playwright][c] Error fetching url: ${url} -> ${error}`);
console.error(`[Playwright] Error fetching url: ${url} -> ${error}`);
return "";
}
}
@ -163,8 +186,8 @@ export async function scrapWithFetch(url: string): Promise<string> {
return "";
}
const contentType = response.headers['content-type'];
if (contentType && contentType.includes('application/pdf')) {
const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) {
return fetchAndProcessPdf(url);
} else {
const text = await response.text();
@ -182,8 +205,13 @@ export async function scrapWithFetch(url: string): Promise<string> {
* @param defaultScraper The default scraper to use if the URL does not have a specific scraper order defined
* @returns The order of scrapers to be used for scraping a URL
*/
function getScrapingFallbackOrder(defaultScraper?: string, isWaitPresent: boolean = false) {
const availableScrapers = baseScrapers.filter(scraper => {
function getScrapingFallbackOrder(
defaultScraper?: string,
isWaitPresent: boolean = false,
isScreenshotPresent: boolean = false,
isHeadersPresent: boolean = false
) {
const availableScrapers = baseScrapers.filter((scraper) => {
switch (scraper) {
case "scrapingBee":
case "scrapingBeeLoad":
@ -197,30 +225,49 @@ function getScrapingFallbackOrder(defaultScraper?: string, isWaitPresent: boolea
}
});
let defaultOrder = ["scrapingBee", "fire-engine", "playwright", "scrapingBeeLoad", "fetch"];
let defaultOrder = [
"scrapingBee",
"fire-engine",
"playwright",
"scrapingBeeLoad",
"fetch",
];
if (isWaitPresent) {
defaultOrder = ["fire-engine", "playwright", ...defaultOrder.filter(scraper => scraper !== "fire-engine" && scraper !== "playwright")];
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
defaultOrder = [
"fire-engine",
"playwright",
...defaultOrder.filter(
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
),
];
}
const filteredDefaultOrder = defaultOrder.filter((scraper: typeof baseScrapers[number]) => availableScrapers.includes(scraper));
const uniqueScrapers = new Set(defaultScraper ? [defaultScraper, ...filteredDefaultOrder, ...availableScrapers] : [...filteredDefaultOrder, ...availableScrapers]);
const filteredDefaultOrder = defaultOrder.filter(
(scraper: (typeof baseScrapers)[number]) =>
availableScrapers.includes(scraper)
);
const uniqueScrapers = new Set(
defaultScraper
? [defaultScraper, ...filteredDefaultOrder, ...availableScrapers]
: [...filteredDefaultOrder, ...availableScrapers]
);
const scrapersInOrder = Array.from(uniqueScrapers);
console.log(`Scrapers in order: ${scrapersInOrder}`);
return scrapersInOrder as typeof baseScrapers[number][];
return scrapersInOrder as (typeof baseScrapers)[number][];
}
async function handleCustomScraping(text: string, url: string): Promise<string | null> {
if (text.includes('<meta name="readme-deploy"')) {
console.log(`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`);
return await scrapWithFireEngine(url, 1000);
}
return null;
}
export async function scrapSingleUrl(
urlToScrap: string,
pageOptions: PageOptions = { onlyMainContent: true, includeHtml: false, waitFor: 0},
pageOptions: PageOptions = {
onlyMainContent: true,
includeHtml: false,
waitFor: 0,
screenshot: false,
headers: undefined
},
existingHtml: string = ""
): Promise<Document> {
urlToScrap = urlToScrap.trim();
@ -239,15 +286,22 @@ export async function scrapSingleUrl(
const attemptScraping = async (
url: string,
method: typeof baseScrapers[number]
method: (typeof baseScrapers)[number]
) => {
let text = "";
let screenshot = "";
switch (method) {
case "fire-engine":
if (process.env.FIRE_ENGINE_BETA_URL) {
console.log(`Scraping ${url} with Fire Engine`);
text = await scrapWithFireEngine(url, pageOptions.waitFor);
const response = await scrapWithFireEngine(
url,
pageOptions.waitFor,
pageOptions.screenshot,
pageOptions.headers
);
text = response.html;
screenshot = response.screenshot;
}
break;
case "scrapingBee":
@ -261,7 +315,7 @@ export async function scrapSingleUrl(
break;
case "playwright":
if (process.env.PLAYWRIGHT_MICROSERVICE_URL) {
text = await scrapWithPlaywright(url, pageOptions.waitFor);
text = await scrapWithPlaywright(url, pageOptions.waitFor, pageOptions.headers);
}
break;
case "scrapingBeeLoad":
@ -274,19 +328,32 @@ export async function scrapSingleUrl(
break;
}
let customScrapedContent : FireEngineResponse | null = null;
// Check for custom scraping conditions
const customScrapedContent = await handleCustomScraping(text, url);
const customScraperResult = await handleCustomScraping(text, url);
if (customScraperResult){
switch (customScraperResult.scraper) {
case "fire-engine":
customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.waitAfterLoad, false, customScraperResult.pageOptions)
case "pdf":
customScrapedContent = { html: await fetchAndProcessPdf(customScraperResult.url), screenshot }
}
}
if (customScrapedContent) {
text = customScrapedContent;
text = customScrapedContent.html;
screenshot = customScrapedContent.screenshot;
}
//* TODO: add an optional to return markdown or structured/extracted content
let cleanedHtml = removeUnwantedElements(text, pageOptions);
return [await parseMarkdown(cleanedHtml), text];
return [await parseMarkdown(cleanedHtml), text, screenshot];
};
try {
let [text, html] = ["", ""];
let [text, html, screenshot] = ["", "", ""];
let urlKey = urlToScrap;
try {
urlKey = new URL(urlToScrap).hostname.replace(/^www\./, "");
@ -294,7 +361,12 @@ export async function scrapSingleUrl(
console.error(`Invalid URL key, trying: ${urlToScrap}`);
}
const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? "";
const scrapersInOrder = getScrapingFallbackOrder(defaultScraper, pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0)
const scrapersInOrder = getScrapingFallbackOrder(
defaultScraper,
pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0,
pageOptions && pageOptions.screenshot && pageOptions.screenshot === true,
pageOptions && pageOptions.headers && pageOptions.headers !== undefined
);
for (const scraper of scrapersInOrder) {
// If exists text coming from crawler, use it
@ -304,7 +376,7 @@ export async function scrapSingleUrl(
html = existingHtml;
break;
}
[text, html] = await attemptScraping(urlToScrap, scraper);
[text, html, screenshot] = await attemptScraping(urlToScrap, scraper);
if (text && text.trim().length >= 100) break;
const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
if (nextScraperIndex < scrapersInOrder.length) {
@ -318,12 +390,27 @@ export async function scrapSingleUrl(
const soup = cheerio.load(html);
const metadata = extractMetadata(soup, urlToScrap);
const document: Document = {
let document: Document;
if (screenshot && screenshot.length > 0) {
document = {
content: text,
markdown: text,
html: pageOptions.includeHtml ? html : undefined,
metadata: {
...metadata,
screenshot: screenshot,
sourceURL: urlToScrap,
},
};
} else {
document = {
content: text,
markdown: text,
html: pageOptions.includeHtml ? html : undefined,
metadata: { ...metadata, sourceURL: urlToScrap },
};
}
return document;
} catch (error) {

View File

@ -0,0 +1,66 @@
import { isUrlBlocked } from '../blocklist';
describe('isUrlBlocked', () => {
it('should return true for blocked social media URLs', () => {
const blockedUrls = [
'https://www.facebook.com',
'https://twitter.com/someuser',
'https://instagram.com/someuser',
'https://www.linkedin.com/in/someuser',
'https://pinterest.com/someuser',
'https://snapchat.com/someuser',
'https://tiktok.com/@someuser',
'https://reddit.com/r/somesubreddit',
'https://flickr.com/photos/someuser',
'https://whatsapp.com/someuser',
'https://wechat.com/someuser',
'https://telegram.org/someuser',
];
blockedUrls.forEach(url => {
if (!isUrlBlocked(url)) {
console.log(`URL not blocked: ${url}`);
}
expect(isUrlBlocked(url)).toBe(true);
});
});
it('should return false for URLs containing allowed keywords', () => {
const allowedUrls = [
'https://www.facebook.com/privacy',
'https://twitter.com/terms',
'https://instagram.com/legal',
'https://www.linkedin.com/help',
'https://pinterest.com/about',
'https://snapchat.com/support',
'https://tiktok.com/contact',
'https://reddit.com/user-agreement',
'https://tumblr.com/policy',
'https://flickr.com/blog',
'https://whatsapp.com/press',
'https://wechat.com/careers',
'https://telegram.org/conditions',
'https://wix.com/careers',
];
allowedUrls.forEach(url => {
expect(isUrlBlocked(url)).toBe(false);
});
});
it('should return false for non-blocked URLs', () => {
const nonBlockedUrls = [
'https://www.example.com',
'https://www.somewebsite.org',
'https://subdomain.example.com',
'firecrawl.dev',
'amazon.com',
'wix.com',
'https://wix.com'
];
nonBlockedUrls.forEach(url => {
expect(isUrlBlocked(url)).toBe(false);
});
});
});

View File

@ -1,5 +1,6 @@
const socialMediaBlocklist = [
'facebook.com',
'x.com',
'twitter.com',
'instagram.com',
'linkedin.com',
@ -32,9 +33,22 @@ const allowedKeywords = [
];
export function isUrlBlocked(url: string): boolean {
// Check if the URL contains any allowed keywords
if (allowedKeywords.some(keyword => url.includes(keyword))) {
return false;
}
return socialMediaBlocklist.some(domain => url.includes(domain));
try {
// Check if the URL matches any domain in the blocklist
return socialMediaBlocklist.some(domain => {
// Create a regular expression to match the exact domain
const domainPattern = new RegExp(`(^|\\.)${domain.replace('.', '\\.')}$`);
// Test the hostname of the URL against the pattern
return domainPattern.test(new URL(url).hostname);
});
} catch (e) {
// If an error occurs (e.g., invalid URL), return false
return false;
}
}

View File

@ -141,5 +141,22 @@ export const urlSpecificParams = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
},
}
},
"firecrawl.dev":{
defaultScraper: "fire-engine",
params: {
headers: {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
referer: "https://www.google.com/",
"accept-language": "en-US,en;q=0.9",
"accept-encoding": "gzip, deflate, br",
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
},
},
},
};

View File

@ -80,7 +80,7 @@ export async function processPdfToText(filePath: string): Promise<string> {
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds
}
} catch (error) {
console.error("Error fetching result:", error || '');
console.error("Error fetching result w/ LlamaIndex");
attempt++;
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying
// You may want to handle specific errors differently
@ -92,7 +92,7 @@ export async function processPdfToText(filePath: string): Promise<string> {
}
content = resultResponse.data[resultType];
} catch (error) {
console.error("Error processing document:", filePath, error);
console.error("Error processing pdf document w/ LlamaIndex(2)");
content = await processPdf(filePath);
}
} else {

View File

@ -1,7 +1,7 @@
import { withAuth } from "../../lib/withAuth";
import { supabase_service } from "../supabase";
const FREE_CREDITS = 300;
const FREE_CREDITS = 500;
export async function billTeam(team_id: string, credits: number) {
return withAuth(supaBillTeam)(team_id, credits);

View File

@ -1,6 +1,7 @@
import Queue from "bull";
import { Queue as BullQueue } from "bull";
let webScraperQueue;
let webScraperQueue: BullQueue;
export function getWebScraperQueue() {
if (!webScraperQueue) {

View File

@ -2,133 +2,68 @@ import { RateLimiterRedis } from "rate-limiter-flexible";
import * as redis from "redis";
import { RateLimiterMode } from "../../src/types";
const MAX_CRAWLS_PER_MINUTE_STARTER = 3;
const MAX_CRAWLS_PER_MINUTE_STANDARD = 5;
const MAX_CRAWLS_PER_MINUTE_SCALE = 20;
const MAX_SCRAPES_PER_MINUTE_STARTER = 20;
const MAX_SCRAPES_PER_MINUTE_STANDARD = 40;
const MAX_SCRAPES_PER_MINUTE_SCALE = 50;
const MAX_SEARCHES_PER_MINUTE_STARTER = 20;
const MAX_SEARCHES_PER_MINUTE_STANDARD = 40;
const MAX_SEARCHES_PER_MINUTE_SCALE = 50;
const MAX_REQUESTS_PER_MINUTE_PREVIEW = 5;
const MAX_REQUESTS_PER_MINUTE_ACCOUNT = 20;
const MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS = 150;
const RATE_LIMITS = {
crawl: {
free: 1,
starter: 3,
standard: 5,
scale: 20,
hobby: 3,
standardNew: 10,
growth: 50,
},
scrape: {
free: 5,
starter: 20,
standardOld: 40,
scale: 50,
hobby: 10,
standardNew: 50,
growth: 500,
},
search: {
free: 5,
starter: 20,
standard: 40,
scale: 50,
hobby: 10,
standardNew: 50,
growth: 500,
},
preview: 5,
account: 20,
crawlStatus: 150,
testSuite: 10000,
};
export const redisClient = redis.createClient({
url: process.env.REDIS_URL,
legacyMode: true,
});
export const previewRateLimiter = new RateLimiterRedis({
const createRateLimiter = (keyPrefix, points) => new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "preview",
points: MAX_REQUESTS_PER_MINUTE_PREVIEW,
duration: 60, // Duration in seconds
});
export const serverRateLimiter = new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "server",
points: MAX_REQUESTS_PER_MINUTE_ACCOUNT,
duration: 60, // Duration in seconds
});
export const crawlStatusRateLimiter = new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "crawl-status",
points: MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS,
duration: 60, // Duration in seconds
});
export const testSuiteRateLimiter = new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "test-suite",
points: 10000,
keyPrefix,
points,
duration: 60, // Duration in seconds
});
export const previewRateLimiter = createRateLimiter("preview", RATE_LIMITS.preview);
export const serverRateLimiter = createRateLimiter("server", RATE_LIMITS.account);
export const crawlStatusRateLimiter = createRateLimiter("crawl-status", RATE_LIMITS.crawlStatus);
export const testSuiteRateLimiter = createRateLimiter("test-suite", RATE_LIMITS.testSuite);
export function getRateLimiter(mode: RateLimiterMode, token: string, plan?: string) {
// Special test suite case. TODO: Change this later.
if (token.includes("5089cefa58") || token.includes("6254cf9")){
if (token.includes("a01ccae") || token.includes("6254cf9")) {
return testSuiteRateLimiter;
}
switch (mode) {
case RateLimiterMode.Preview:
return previewRateLimiter;
case RateLimiterMode.CrawlStatus:
return crawlStatusRateLimiter;
case RateLimiterMode.Crawl:
if (plan === "standard"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "crawl-standard",
points: MAX_CRAWLS_PER_MINUTE_STANDARD,
duration: 60, // Duration in seconds
});
} else if (plan === "scale"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "crawl-scale",
points: MAX_CRAWLS_PER_MINUTE_SCALE,
duration: 60, // Duration in seconds
});
}
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "crawl-starter",
points: MAX_CRAWLS_PER_MINUTE_STARTER,
duration: 60, // Duration in seconds
});
case RateLimiterMode.Scrape:
if (plan === "standard"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "scrape-standard",
points: MAX_SCRAPES_PER_MINUTE_STANDARD,
duration: 60, // Duration in seconds
});
} else if (plan === "scale"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "scrape-scale",
points: MAX_SCRAPES_PER_MINUTE_SCALE,
duration: 60, // Duration in seconds
});
}
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "scrape-starter",
points: MAX_SCRAPES_PER_MINUTE_STARTER,
duration: 60, // Duration in seconds
});
case RateLimiterMode.Search:
if (plan === "standard"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "search-standard",
points: MAX_SEARCHES_PER_MINUTE_STANDARD,
duration: 60, // Duration in seconds
});
} else if (plan === "scale"){
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "search-scale",
points: MAX_SEARCHES_PER_MINUTE_SCALE,
duration: 60, // Duration in seconds
});
}
return new RateLimiterRedis({
storeClient: redisClient,
keyPrefix: "search-starter",
points: MAX_SEARCHES_PER_MINUTE_STARTER,
duration: 60, // Duration in seconds
});
default:
return serverRateLimiter;
}
const rateLimitConfig = RATE_LIMITS[mode];
if (!rateLimitConfig) return serverRateLimiter;
const planKey = plan ? plan.replace("-", "") : "starter";
const points = rateLimitConfig[planKey] || rateLimitConfig.preview;
return createRateLimiter(`${mode}-${planKey}`, points);
}

View File

@ -2,14 +2,21 @@ import { supabase_service } from "./supabase";
export const callWebhook = async (teamId: string, data: any) => {
try {
const selfHostedUrl = process.env.SELF_HOSTED_WEBHOOK_URL;
let webhookUrl = selfHostedUrl;
if (!selfHostedUrl) {
const { data: webhooksData, error } = await supabase_service
.from('webhooks')
.select('url')
.eq('team_id', teamId)
.from("webhooks")
.select("url")
.eq("team_id", teamId)
.limit(1);
if (error) {
console.error(`Error fetching webhook URL for team ID: ${teamId}`, error.message);
console.error(
`Error fetching webhook URL for team ID: ${teamId}`,
error.message
);
return null;
}
@ -17,6 +24,9 @@ export const callWebhook = async (teamId: string, data: any) => {
return null;
}
webhookUrl = webhooksData[0].url;
}
let dataToSend = [];
if (data.result.links && data.result.links.length !== 0) {
for (let i = 0; i < data.result.links.length; i++) {
@ -28,10 +38,10 @@ export const callWebhook = async (teamId: string, data: any) => {
}
}
await fetch(webhooksData[0].url, {
method: 'POST',
await fetch(webhookUrl, {
method: "POST",
headers: {
'Content-Type': 'application/json',
"Content-Type": "application/json",
},
body: JSON.stringify({
success: data.success,
@ -40,7 +50,9 @@ export const callWebhook = async (teamId: string, data: any) => {
}),
});
} catch (error) {
console.error(`Error sending webhook for team ID: ${teamId}`, error.message);
console.error(
`Error sending webhook for team ID: ${teamId}`,
error.message
);
}
};

View File

@ -57,6 +57,7 @@ export interface AuthResponse {
team_id?: string;
error?: string;
status?: number;
plan?: string;
}

View File

@ -19,6 +19,7 @@ export default class FirecrawlApp {
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null }) {
this.apiUrl = "https://api.firecrawl.dev";
this.apiKey = apiKey || "";
if (!this.apiKey) {
throw new Error("No API key provided");
@ -47,7 +48,7 @@ export default class FirecrawlApp {
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
}
try {
const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
@ -84,7 +85,7 @@ export default class FirecrawlApp {
jsonData = Object.assign(Object.assign({}, jsonData), params);
}
try {
const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
@ -109,23 +110,23 @@ export default class FirecrawlApp {
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {number} pollInterval - Time in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url };
if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params);
}
try {
const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
if (response.status === 200) {
const jobId = response.data.jobId;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, timeout);
return this.monitorJobStatus(jobId, headers, pollInterval);
}
else {
return { success: true, jobId };
@ -151,9 +152,14 @@ export default class FirecrawlApp {
return __awaiter(this, void 0, void 0, function* () {
const headers = this.prepareHeaders();
try {
const response = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
if (response.status === 200) {
return response.data;
return {
success: true,
status: response.data.status,
data: response.data.data,
partial_data: !response.data.data ? response.data.partial_data : undefined,
};
}
else {
this.handleError(response, "check crawl status");
@ -202,10 +208,10 @@ export default class FirecrawlApp {
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
monitorJobStatus(jobId, headers, timeout) {
monitorJobStatus(jobId, headers, checkInterval) {
return __awaiter(this, void 0, void 0, function* () {
while (true) {
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
@ -217,10 +223,10 @@ export default class FirecrawlApp {
}
}
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
if (timeout < 2) {
timeout = 2;
if (checkInterval < 2) {
checkInterval = 2;
}
yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
}
else {
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.22",
"version": "0.0.23",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",

View File

@ -30,7 +30,7 @@ describe('FirecrawlApp E2E Tests', () => {
const response = await app.scrapeUrl('https://firecrawl.dev');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
}, 10000); // 10 seconds timeout
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
@ -40,7 +40,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html');
}, 10000); // 10 seconds timeout
}, 30000); // 30 seconds timeout
test('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
@ -49,7 +49,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.markdown).toContain("🔥 Firecrawl");
expect(response.data.html).toContain("<h1");
}, 10000); // 10 seconds timeout
}, 30000); // 30 seconds timeout
test('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
@ -99,12 +99,21 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();
await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
const statusResponse = await app.checkCrawlStatus(response.jobId);
let statusResponse = await app.checkCrawlStatus(response.jobId);
const maxChecks = 15;
let checks = 0;
while (statusResponse.status === 'active' && checks < maxChecks) {
await new Promise(resolve => setTimeout(resolve, 1000));
expect(statusResponse.partial_data).not.toBeNull();
statusResponse = await app.checkCrawlStatus(response.jobId);
checks++;
}
expect(statusResponse).not.toBeNull();
expect(statusResponse.status).toBe('completed');
expect(statusResponse.data.length).toBeGreaterThan(0);
}, 30000); // 30 seconds timeout
}, 35000); // 35 seconds timeout
test('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });

View File

@ -56,6 +56,7 @@ export interface JobStatusResponse {
status: string;
jobId?: string;
data?: any;
partial_data?: any,
error?: string;
}
@ -174,7 +175,7 @@ export default class FirecrawlApp {
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {number} pollInterval - Time in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
@ -182,7 +183,7 @@ export default class FirecrawlApp {
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
timeout: number = 2,
pollInterval: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders(idempotencyKey);
@ -199,7 +200,7 @@ export default class FirecrawlApp {
if (response.status === 200) {
const jobId: string = response.data.jobId;
if (waitUntilDone) {
return this.monitorJobStatus(jobId, headers, timeout);
return this.monitorJobStatus(jobId, headers, pollInterval);
} else {
return { success: true, jobId };
}
@ -226,7 +227,12 @@ export default class FirecrawlApp {
headers
);
if (response.status === 200) {
return response.data;
return {
success: true,
status: response.data.status,
data: response.data.data,
partial_data: !response.data.data ? response.data.partial_data : undefined,
};
} else {
this.handleError(response, "check crawl status");
}
@ -290,7 +296,7 @@ export default class FirecrawlApp {
async monitorJobStatus(
jobId: string,
headers: AxiosRequestHeaders,
timeout: number
checkInterval: number
): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
@ -308,10 +314,10 @@ export default class FirecrawlApp {
} else if (
["active", "paused", "pending", "queued"].includes(statusData.status)
) {
if (timeout < 2) {
timeout = 2;
if (checkInterval < 2) {
checkInterval = 2;
}
await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
} else {
throw new Error(
`Crawl job failed or was stopped. Status: ${statusData.status}`

View File

@ -5,6 +5,7 @@ import { z } from "zod";
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Generic parameter interface.
@ -50,6 +51,7 @@ export interface JobStatusResponse {
status: string;
jobId?: string;
data?: any;
partial_data?: any;
error?: string;
}
/**
@ -57,6 +59,7 @@ export interface JobStatusResponse {
*/
export default class FirecrawlApp {
private apiKey;
private apiUrl;
/**
* Initializes a new instance of the FirecrawlApp class.
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
@ -81,11 +84,11 @@ export default class FirecrawlApp {
* @param {string} url - The URL to crawl.
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {number} pollInterval - Time in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
@ -119,7 +122,7 @@ export default class FirecrawlApp {
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any>;
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
/**
* Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API.

View File

@ -1,60 +1,86 @@
from fastapi import FastAPI
from playwright.async_api import async_playwright, Browser
from fastapi.responses import JSONResponse
from pydantic import BaseModel
"""
This module provides a FastAPI application that uses Playwright to fetch and return
the HTML content of a specified URL. It supports optional proxy settings and media blocking.
"""
from os import environ
PROXY_SERVER = environ.get('PROXY_SERVER', None)
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from playwright.async_api import Browser, async_playwright
from pydantic import BaseModel
PROXY_SERVER = environ.get("PROXY_SERVER", None)
PROXY_USERNAME = environ.get("PROXY_USERNAME", None)
PROXY_PASSWORD = environ.get("PROXY_PASSWORD", None)
BLOCK_MEDIA = environ.get("BLOCK_MEDIA", "False").upper() == "TRUE"
app = FastAPI()
class UrlModel(BaseModel):
"""Model representing the URL and associated parameters for the request."""
url: str
wait: int = None
wait_after_load: int = 0
timeout: int = 15000
headers: dict = None
browser: Browser = None
@app.on_event("startup")
async def startup_event():
"""Event handler for application startup to initialize the browser."""
global browser
playwright = await async_playwright().start()
browser = await playwright.chromium.launch()
@app.on_event("shutdown")
async def shutdown_event():
"""Event handler for application shutdown to close the browser."""
await browser.close()
@app.post("/html")
async def root(body: UrlModel):
"""
Endpoint to fetch and return HTML content of a given URL.
Args:
body (UrlModel): The URL model containing the target URL, wait time, and timeout.
Returns:
JSONResponse: The HTML content of the page.
"""
context = None
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
context = await browser.new_context(proxy={"server": PROXY_SERVER,
context = await browser.new_context(
proxy={
"server": PROXY_SERVER,
"username": PROXY_USERNAME,
"password": PROXY_PASSWORD})
"password": PROXY_PASSWORD,
}
)
else:
context = await browser.new_context()
if BLOCK_MEDIA:
await context.route("**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
handler=lambda route, request: route.abort())
await context.route(
"**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
handler=lambda route, request: route.abort(),
)
page = await context.new_page()
# Set headers if provided
if body.headers:
await page.set_extra_http_headers(body.headers)
await page.goto(
body.url,
wait_until="load",
timeout=body.timeout if body.timeout else 15000,
timeout=body.timeout,
)
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
if body.wait:
await page.wait_for_timeout(body.wait)
if body.wait_after_load > 0:
await page.wait_for_timeout(body.wait_after_load)
page_content = await page.content()
await context.close()

View File

@ -273,7 +273,7 @@ class FirecrawlApp:
return status_data['data']
else:
raise Exception('Crawl job completed but no data was returned')
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
timeout=max(timeout,2)
time.sleep(timeout) # Wait for the specified timeout before checking again
else:

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -273,7 +273,7 @@ class FirecrawlApp:
return status_data['data']
else:
raise Exception('Crawl job completed but no data was returned')
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
timeout=max(timeout,2)
time.sleep(timeout) # Wait for the specified timeout before checking again
else:

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: firecrawl-py
Version: 0.0.11
Version: 0.0.12
Summary: Python SDK for Firecrawl API
Home-page: https://github.com/mendableai/firecrawl
Author: Mendable.ai
@ -151,6 +151,25 @@ status = app.check_crawl_status(job_id)
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
## Running the Tests with Pytest
To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
### Running the Tests
To run the tests, execute the following commands:
Install pytest:
```bash
pip install pytest
```
Run:
```bash
pytest firecrawl/__tests__/e2e_withAuth/test.py
```
## Contributing
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.

View File

@ -1 +1,3 @@
requests
pytest
python-dotenv

41
examples/k8n/README.md Normal file
View File

@ -0,0 +1,41 @@
# Install Firecrawl on a Kubernetes Cluster (Simple Version)
# Before installing
1. Set [secret.yaml](secret.yaml) and [configmap.yaml](configmap.yaml) and do not check in secrets
2. Build Docker images, and host it in your Docker Registry (replace the target registry with your own)
1. API (which is also used as a worker image)
1. ```bash
docker build -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/api
docker push ghcr.io/winkk-dev/firecrawl:latest
```
2. Playwright
1. ```bash
docker build -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/playwright-service
docker push ghcr.io/winkk-dev/firecrawl-playwright:latest
```
3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml)
## Install
```bash
kubectl apply -f configmap.yaml
kubectl apply -f secret.yaml
kubectl apply -f playwright-service.yaml
kubectl apply -f api.yaml
kubectl apply -f worker.yaml
kubectl apply -f redis.yaml
```
# Port Forwarding for Testing
```bash
kubectl port-forward svc/api 3002:3002 -n dev
```
# Delete Firecrawl
```bash
kubectl delete -f configmap.yaml
kubectl delete -f secret.yaml
kubectl delete -f playwright-service.yaml
kubectl delete -f api.yaml
kubectl delete -f worker.yaml
kubectl delete -f redis.yaml
```

39
examples/k8n/api.yaml Normal file
View File

@ -0,0 +1,39 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: api
spec:
replicas: 1
selector:
matchLabels:
app: api
template:
metadata:
labels:
app: api
spec:
imagePullSecrets:
- name: docker-registry-secret
containers:
- name: api
image: ghcr.io/winkk-dev/firecrawl:latest
args: [ "pnpm", "run", "start:production" ]
ports:
- containerPort: 3002
envFrom:
- configMapRef:
name: firecrawl-config
- secretRef:
name: firecrawl-secret
---
apiVersion: v1
kind: Service
metadata:
name: api
spec:
selector:
app: api
ports:
- protocol: TCP
port: 3002
targetPort: 3002

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: firecrawl-config
data:
NUM_WORKERS_PER_QUEUE: "8"
PORT: "3002"
HOST: "0.0.0.0"
REDIS_URL: "redis://redis:6379"
PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000"
USE_DB_AUTHENTICATION: "false"
SUPABASE_ANON_TOKEN: ""
SUPABASE_URL: ""
SUPABASE_SERVICE_TOKEN: ""

View File

@ -0,0 +1,36 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: playwright-service
spec:
replicas: 1
selector:
matchLabels:
app: playwright-service
template:
metadata:
labels:
app: playwright-service
spec:
imagePullSecrets:
- name: docker-registry-secret
containers:
- name: playwright-service
image: ghcr.io/winkk-dev/firecrawl-playwright:latest
ports:
- containerPort: 3000
envFrom:
- configMapRef:
name: firecrawl-config
---
apiVersion: v1
kind: Service
metadata:
name: playwright-service
spec:
selector:
app: playwright-service
ports:
- protocol: TCP
port: 3000
targetPort: 3000

30
examples/k8n/redis.yaml Normal file
View File

@ -0,0 +1,30 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:alpine
args: ["redis-server", "--bind", "0.0.0.0"]
---
apiVersion: v1
kind: Service
metadata:
name: redis
spec:
selector:
app: redis
ports:
- protocol: TCP
port: 6379
targetPort: 6379

20
examples/k8n/secret.yaml Normal file
View File

@ -0,0 +1,20 @@
apiVersion: v1
kind: Secret
metadata:
name: firecrawl-secret
type: Opaque
data:
OPENAI_API_KEY: ""
SLACK_WEBHOOK_URL: ""
SERPER_API_KEY: ""
LLAMAPARSE_API_KEY: ""
LOGTAIL_KEY: ""
BULL_AUTH_KEY: ""
TEST_API_KEY: ""
POSTHOG_API_KEY: ""
POSTHOG_HOST: ""
SCRAPING_BEE_API_KEY: ""
STRIPE_PRICE_ID_STANDARD: ""
STRIPE_PRICE_ID_SCALE: ""
HYPERDX_API_KEY: ""
FIRE_ENGINE_BETA_URL: ""

24
examples/k8n/worker.yaml Normal file
View File

@ -0,0 +1,24 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: worker
spec:
replicas: 1
selector:
matchLabels:
app: worker
template:
metadata:
labels:
app: worker
spec:
imagePullSecrets:
- name: docker-registry-secret
containers:
- name: worker
image: ghcr.io/winkk-dev/firecrawl:latest
envFrom:
- configMapRef:
name: firecrawl-config
- secretRef:
name: firecrawl-secret

View File

@ -0,0 +1,3 @@
{
"extends": "next/core-web-vitals"
}

38
examples/roastmywebsite/.gitignore vendored Normal file
View File

@ -0,0 +1,38 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
.env
node_modules

View File

@ -0,0 +1,5 @@
# Roast My Website 🔥
Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them.
Check it out at roastmywebsite.ai 😈

View File

@ -0,0 +1,17 @@
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "default",
"rsc": true,
"tsx": true,
"tailwind": {
"config": "tailwind.config.ts",
"css": "src/app/globals.css",
"baseColor": "zinc",
"cssVariables": false,
"prefix": ""
},
"aliases": {
"components": "@/components",
"utils": "@/lib/utils"
}
}

View File

@ -0,0 +1,11 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
env: {
G1: process.env.G1,
G2: process.env.G2,
G3: process.env.G3,
G4: process.env.G4,
},
};
export default nextConfig;

6617
examples/roastmywebsite/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,53 @@
{
"name": "roastmywebsite",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@dqbd/tiktoken": "^1.0.15",
"@headlessui/react": "^2.0.4",
"@headlessui/tailwindcss": "^0.2.0",
"@mendable/firecrawl-js": "^0.0.21",
"@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-dropdown-menu": "^2.0.6",
"@radix-ui/react-select": "^2.0.0",
"@radix-ui/react-slot": "^1.0.2",
"@radix-ui/react-switch": "^1.0.3",
"@remixicon/react": "^4.2.0",
"@tremor/react": "^3.17.2",
"@vercel/analytics": "^1.3.1",
"axios": "^1.7.2",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
"cubic-spline": "^3.0.3",
"html2canvas": "^1.4.1",
"image-size": "^1.1.1",
"lucide": "^0.379.0",
"lucide-react": "^0.379.0",
"next": "14.2.3",
"next-themes": "^0.3.0",
"openai": "^4.47.3",
"react": "^18",
"react-dom": "^18",
"sonner": "^1.4.41",
"tailwind-merge": "^2.3.0",
"tailwindcss-animate": "^1.0.7",
"tiktoken": "^1.0.15"
},
"devDependencies": {
"@tailwindcss/forms": "^0.5.7",
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"eslint": "^8",
"eslint-config-next": "14.2.3",
"postcss": "^8",
"tailwindcss": "^3.4.3",
"typescript": "^5"
}
}

View File

@ -0,0 +1,8 @@
/** @type {import('postcss-load-config').Config} */
const config = {
plugins: {
tailwindcss: {},
},
};
export default config;

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 444 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 492 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 997 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 262 KiB

View File

@ -0,0 +1 @@
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>

After

Width:  |  Height:  |  Size: 629 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,10 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
.fill-tremor-content-emphasis {
fill: rgb(113 113 122) !important;
}

View File

@ -0,0 +1,5 @@
export async function useGithubStars() {
const res = await fetch("https://api.github.com/repos/mendableai/firecrawl");
const data = await res.json();
return data.stargazers_count;
}

View File

@ -0,0 +1,68 @@
import type { Metadata } from "next";
import { Gloria_Hallelujah } from "next/font/google";
import "./globals.css";
import { Toaster } from "sonner";
import { Analytics } from "@vercel/analytics/react";
import { useEffect, useState } from "react";
import Head from "next/head";
const inter = Gloria_Hallelujah({ weight: "400", subsets: ["latin"] });
// const inter = Inter({ subsets: ["latin"] });
const meta = {
title: "Roast My Website",
description:
"Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 😈",
cardImage: "/og.png",
robots: "follow, index",
favicon: "/favicon.ico",
url: "https://www.roastmywebsite.ai/",
};
export async function generateMetadata(): Promise<Metadata> {
return {
title: meta.title,
description: meta.description,
referrer: "origin-when-cross-origin",
keywords: ["Roast My Website", "Roast", "Website", "GitHub", "Firecrawl"],
authors: [
{ name: "Roast My Website", url: "https://www.roastmywebsite.ai/" },
],
creator: "Roast My Website",
publisher: "Roast My Website",
robots: meta.robots,
icons: { icon: meta.favicon },
metadataBase: new URL(meta.url),
openGraph: {
url: meta.url,
title: meta.title,
description: meta.description,
images: [meta.cardImage],
type: "website",
siteName: meta.title,
},
twitter: {
card: "summary_large_image",
site: "@Vercel",
creator: "@Vercel",
title: meta.title,
description: meta.description,
images: [meta.cardImage],
},
};
}
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
<html lang="en">
<body className={inter.className}>{children}</body>
<Analytics />
<Toaster />
</html>
);
}

View File

@ -0,0 +1,16 @@
// pages/index.tsx
import MainComponent from "@/components/main";
import { useGithubStars } from "./hooks/useGithubStars";
import GithubButton from "@/components/github-button";
export default async function Home() {
const githubStars = await useGithubStars();
return (
<div className="relative">
<div className="hidden md:flex z-10 absolute top-4 right-4 p-4">
<GithubButton githubStars={githubStars} />
</div>
<MainComponent />
</div>
);
}

View File

@ -0,0 +1,20 @@
"use client";
import { Github } from "lucide-react";
import { Button } from "./ui/button";
export default function GithubButton({ githubStars }: { githubStars: number }) {
return (
<Button
onClick={() => {
window.open("https://github.com/mendableai/firecrawl", "_blank");
}}
variant="outline"
size="icon"
className="px-3 w-22 gap-2"
>
<Github className="h-4 w-4" />{" "}
{/* {githubStars ? `Star us on GitHub` : "GitHub"} */}
See code on Github
</Button>
);
}

View File

@ -0,0 +1,113 @@
"use client";
import { useEffect, useRef, useState } from "react";
import { Theme, allThemes } from "@/lib/theme";
import html2canvas from "html2canvas";
import { Button } from "@/components/ui/button";
import { Input } from "./ui/input";
import { Github } from "lucide-react";
export default function MainComponent() {
const [roastUrl, setRoastUrl] = useState("");
const [loading, setLoading] = useState(false);
const [roastData, setRoastData] = useState("");
const [spiceLevel, setSpiceLevel] = useState(2);
return (
<div
className="h-screen"
style={{
background: `linear-gradient(to bottom right, rgba(255, 255, 255, 0.75) 58%, #fff, red )`,
}}
>
<main className="relative flex h-[95vh] flex-col items-center justify-center bg-transparent bg-opacity-80">
<div className="w-3/4 flex flex-col items-center gap-4">
<h1 className="font-inter-tight text-4xl lg:text-5xl xl:text-6xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-zinc-500 via-zinc-900 to-zinc-900 pb-4 text-center">
<em className="relative px-1 italic animate-text transition-all text-transparent bg-clip-text bg-gradient-to-tr from-red-600 to-red-400 inline-flex justify-center items-center text-6xl lg:text-7xl xl:text-8xl">
Roast
</em>
<br />{" "}
<span className="text-4xl lg:text-5xl xl:text-6xl">My Website</span>
</h1>
<div className="flex flex-col sm:flex-row items-center gap-4 justify-center w-3/5">
<Input
type="text"
className="w-full p-2 border border-gray-300 rounded r"
placeholder="https://coconut.com/"
value={roastUrl}
onChange={(e) => setRoastUrl(e.target.value)}
/>
<Button
className="px-6 py-2 bg-red-500/25 text-red-500 0 rounded-lg hover:bg-red-300 w-1/8 whitespace-nowrap"
onClick={async () => {
if (roastUrl) {
setLoading(true);
try {
const response = await fetch(
`/api/roastWebsite?url=${encodeURIComponent(
roastUrl
)}&spiceLevel=${spiceLevel}`
);
if (!response.ok) {
throw new Error(`Error: ${response.statusText}`);
}
const data = await response.json();
setRoastData(data.roastResult);
} catch (error) {
console.error("Error:", error);
} finally {
setLoading(false);
}
}
}}
>
{loading ? "Loading..." : "Get Roasted 🌶️"}
</Button>
</div>
<div className="flex items-center justify-center mt-4">
<label
htmlFor="spice-level"
className="mr-4 font-medium text-gray-700"
>
Choose your roast level:
</label>
<select
id="spice-level"
className="cursor-pointer rounded-lg border border-gray-300 bg-white py-2 px-4 text-center shadow-sm focus:border-orange-500 focus:outline-none focus:ring-1 focus:ring-orange-500"
value={spiceLevel}
onChange={(e) => setSpiceLevel(Number(e.target.value))}
>
<option value={1}>Mild 🌶</option>
<option value={2}>Medium 🌶🌶</option>
<option value={3}>Spicy 🌶🌶🌶</option>
</select>
</div>
{loading ? (
<div className="mt-4 w-3/5 p-4 border border-gray-300 rounded shadow bg-gradient-to-r from-red-500 to-red-400 animate-pulse">
<p className="text-white text-center">Preparing your roast...</p>
</div>
) : (
roastData && (
<div className="!font-sans mt-4 w-3/5 p-4 border border-gray-300 rounded shadow">
<p>{roastData}</p>
</div>
)
)}
</div>
<div
className={`fixed bottom-0 left-0 right-0 p-4 text-white text-center font-light flex justify-center items-center gap-4`}
>
<a
href="https://firecrawl.dev"
target="_blank"
className="text-black hover:text-orange-400"
style={{ textShadow: "2px 2px 4px rgba(0, 0, 0, 0.15)" }}
>
A demo web scraping and vision extraction from Firecrawl 🔥
</a>
</div>
</main>
</div>
);
}

View File

@ -0,0 +1,56 @@
import * as React from "react"
import { Slot } from "@radix-ui/react-slot"
import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils"
const buttonVariants = cva(
"inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 dark:ring-offset-zinc-950 dark:focus-visible:ring-zinc-300",
{
variants: {
variant: {
default: "bg-zinc-900 text-zinc-50 hover:bg-zinc-900/90 dark:bg-zinc-50 dark:text-zinc-900 dark:hover:bg-zinc-50/90",
destructive:
"bg-red-500 text-zinc-50 hover:bg-red-500/90 dark:bg-red-900 dark:text-zinc-50 dark:hover:bg-red-900/90",
outline:
"border border-zinc-200 bg-white hover:bg-zinc-100 hover:text-zinc-900 dark:border-zinc-800 dark:bg-zinc-950 dark:hover:bg-zinc-800 dark:hover:text-zinc-50",
secondary:
"bg-zinc-100 text-zinc-900 hover:bg-zinc-100/80 dark:bg-zinc-800 dark:text-zinc-50 dark:hover:bg-zinc-800/80",
ghost: "hover:bg-zinc-100 hover:text-zinc-900 dark:hover:bg-zinc-800 dark:hover:text-zinc-50",
link: "text-zinc-900 underline-offset-4 hover:underline dark:text-zinc-50",
},
size: {
default: "h-10 px-4 py-2",
sm: "h-9 rounded-md px-3",
lg: "h-11 rounded-md px-8",
icon: "h-10 w-10",
},
},
defaultVariants: {
variant: "default",
size: "default",
},
}
)
export interface ButtonProps
extends React.ButtonHTMLAttributes<HTMLButtonElement>,
VariantProps<typeof buttonVariants> {
asChild?: boolean
}
const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
({ className, variant, size, asChild = false, ...props }, ref) => {
const Comp = asChild ? Slot : "button"
return (
<Comp
className={cn(buttonVariants({ variant, size, className }))}
ref={ref}
{...props}
/>
)
}
)
Button.displayName = "Button"
export { Button, buttonVariants }

View File

@ -0,0 +1,122 @@
"use client"
import * as React from "react"
import * as DialogPrimitive from "@radix-ui/react-dialog"
import { X } from "lucide-react"
import { cn } from "@/lib/utils"
const Dialog = DialogPrimitive.Root
const DialogTrigger = DialogPrimitive.Trigger
const DialogPortal = DialogPrimitive.Portal
const DialogClose = DialogPrimitive.Close
const DialogOverlay = React.forwardRef<
React.ElementRef<typeof DialogPrimitive.Overlay>,
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay>
>(({ className, ...props }, ref) => (
<DialogPrimitive.Overlay
ref={ref}
className={cn(
"fixed inset-0 z-50 bg-black/80 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
className
)}
{...props}
/>
))
DialogOverlay.displayName = DialogPrimitive.Overlay.displayName
const DialogContent = React.forwardRef<
React.ElementRef<typeof DialogPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content>
>(({ className, children, ...props }, ref) => (
<DialogPortal>
<DialogOverlay />
<DialogPrimitive.Content
ref={ref}
className={cn(
"fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-zinc-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg dark:border-zinc-800 dark:bg-zinc-950",
className
)}
{...props}
>
{children}
<DialogPrimitive.Close className="absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-zinc-950 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-zinc-100 data-[state=open]:text-zinc-500 dark:ring-offset-zinc-950 dark:focus:ring-zinc-300 dark:data-[state=open]:bg-zinc-800 dark:data-[state=open]:text-zinc-400">
<X className="h-4 w-4" />
<span className="sr-only">Close</span>
</DialogPrimitive.Close>
</DialogPrimitive.Content>
</DialogPortal>
))
DialogContent.displayName = DialogPrimitive.Content.displayName
const DialogHeader = ({
className,
...props
}: React.HTMLAttributes<HTMLDivElement>) => (
<div
className={cn(
"flex flex-col space-y-1.5 text-center sm:text-left",
className
)}
{...props}
/>
)
DialogHeader.displayName = "DialogHeader"
const DialogFooter = ({
className,
...props
}: React.HTMLAttributes<HTMLDivElement>) => (
<div
className={cn(
"flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",
className
)}
{...props}
/>
)
DialogFooter.displayName = "DialogFooter"
const DialogTitle = React.forwardRef<
React.ElementRef<typeof DialogPrimitive.Title>,
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Title>
>(({ className, ...props }, ref) => (
<DialogPrimitive.Title
ref={ref}
className={cn(
"text-lg font-semibold leading-none tracking-tight",
className
)}
{...props}
/>
))
DialogTitle.displayName = DialogPrimitive.Title.displayName
const DialogDescription = React.forwardRef<
React.ElementRef<typeof DialogPrimitive.Description>,
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Description>
>(({ className, ...props }, ref) => (
<DialogPrimitive.Description
ref={ref}
className={cn("text-sm text-zinc-500 dark:text-zinc-400", className)}
{...props}
/>
))
DialogDescription.displayName = DialogPrimitive.Description.displayName
export {
Dialog,
DialogPortal,
DialogOverlay,
DialogClose,
DialogTrigger,
DialogContent,
DialogHeader,
DialogFooter,
DialogTitle,
DialogDescription,
}

View File

@ -0,0 +1,200 @@
"use client"
import * as React from "react"
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu"
import { Check, ChevronRight, Circle } from "lucide-react"
import { cn } from "@/lib/utils"
const DropdownMenu = DropdownMenuPrimitive.Root
const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger
const DropdownMenuGroup = DropdownMenuPrimitive.Group
const DropdownMenuPortal = DropdownMenuPrimitive.Portal
const DropdownMenuSub = DropdownMenuPrimitive.Sub
const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup
const DropdownMenuSubTrigger = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {
inset?: boolean
}
>(({ className, inset, children, ...props }, ref) => (
<DropdownMenuPrimitive.SubTrigger
ref={ref}
className={cn(
"flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-zinc-100 data-[state=open]:bg-zinc-100 dark:focus:bg-zinc-800 dark:data-[state=open]:bg-zinc-800",
inset && "pl-8",
className
)}
{...props}
>
{children}
<ChevronRight className="ml-auto h-4 w-4" />
</DropdownMenuPrimitive.SubTrigger>
))
DropdownMenuSubTrigger.displayName =
DropdownMenuPrimitive.SubTrigger.displayName
const DropdownMenuSubContent = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>
>(({ className, ...props }, ref) => (
<DropdownMenuPrimitive.SubContent
ref={ref}
className={cn(
"z-50 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white p-1 text-zinc-950 shadow-lg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
className
)}
{...props}
/>
))
DropdownMenuSubContent.displayName =
DropdownMenuPrimitive.SubContent.displayName
const DropdownMenuContent = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
>(({ className, sideOffset = 4, ...props }, ref) => (
<DropdownMenuPrimitive.Portal>
<DropdownMenuPrimitive.Content
ref={ref}
sideOffset={sideOffset}
className={cn(
"z-50 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white p-1 text-zinc-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
className
)}
{...props}
/>
</DropdownMenuPrimitive.Portal>
))
DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName
const DropdownMenuItem = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
inset?: boolean
}
>(({ className, inset, ...props }, ref) => (
<DropdownMenuPrimitive.Item
ref={ref}
className={cn(
"relative flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
inset && "pl-8",
className
)}
{...props}
/>
))
DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName
const DropdownMenuCheckboxItem = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
>(({ className, children, checked, ...props }, ref) => (
<DropdownMenuPrimitive.CheckboxItem
ref={ref}
className={cn(
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
className
)}
checked={checked}
{...props}
>
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
<DropdownMenuPrimitive.ItemIndicator>
<Check className="h-4 w-4" />
</DropdownMenuPrimitive.ItemIndicator>
</span>
{children}
</DropdownMenuPrimitive.CheckboxItem>
))
DropdownMenuCheckboxItem.displayName =
DropdownMenuPrimitive.CheckboxItem.displayName
const DropdownMenuRadioItem = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>
>(({ className, children, ...props }, ref) => (
<DropdownMenuPrimitive.RadioItem
ref={ref}
className={cn(
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
className
)}
{...props}
>
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
<DropdownMenuPrimitive.ItemIndicator>
<Circle className="h-2 w-2 fill-current" />
</DropdownMenuPrimitive.ItemIndicator>
</span>
{children}
</DropdownMenuPrimitive.RadioItem>
))
DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName
const DropdownMenuLabel = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.Label>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {
inset?: boolean
}
>(({ className, inset, ...props }, ref) => (
<DropdownMenuPrimitive.Label
ref={ref}
className={cn(
"px-2 py-1.5 text-sm font-semibold",
inset && "pl-8",
className
)}
{...props}
/>
))
DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName
const DropdownMenuSeparator = React.forwardRef<
React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
>(({ className, ...props }, ref) => (
<DropdownMenuPrimitive.Separator
ref={ref}
className={cn("-mx-1 my-1 h-px bg-zinc-100 dark:bg-zinc-800", className)}
{...props}
/>
))
DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName
const DropdownMenuShortcut = ({
className,
...props
}: React.HTMLAttributes<HTMLSpanElement>) => {
return (
<span
className={cn("ml-auto text-xs tracking-widest opacity-60", className)}
{...props}
/>
)
}
DropdownMenuShortcut.displayName = "DropdownMenuShortcut"
export {
DropdownMenu,
DropdownMenuTrigger,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuCheckboxItem,
DropdownMenuRadioItem,
DropdownMenuLabel,
DropdownMenuSeparator,
DropdownMenuShortcut,
DropdownMenuGroup,
DropdownMenuPortal,
DropdownMenuSub,
DropdownMenuSubContent,
DropdownMenuSubTrigger,
DropdownMenuRadioGroup,
}

View File

@ -0,0 +1,25 @@
import * as React from "react"
import { cn } from "@/lib/utils"
export interface InputProps
extends React.InputHTMLAttributes<HTMLInputElement> {}
const Input = React.forwardRef<HTMLInputElement, InputProps>(
({ className, type, ...props }, ref) => {
return (
<input
type={type}
className={cn(
"flex h-10 w-full rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-zinc-500 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus-visible:ring-zinc-300",
className
)}
ref={ref}
{...props}
/>
)
}
)
Input.displayName = "Input"
export { Input }

View File

@ -0,0 +1,160 @@
"use client"
import * as React from "react"
import * as SelectPrimitive from "@radix-ui/react-select"
import { Check, ChevronDown, ChevronUp } from "lucide-react"
import { cn } from "@/lib/utils"
const Select = SelectPrimitive.Root
const SelectGroup = SelectPrimitive.Group
const SelectValue = SelectPrimitive.Value
const SelectTrigger = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.Trigger>,
Omit<React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>, 'noIcon'> & { noIcon?: boolean }
>(({ className, children, noIcon, ...props }, ref) => (
<SelectPrimitive.Trigger
ref={ref}
className={cn(
"flex h-10 w-full items-center justify-between rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white placeholder:text-zinc-500 focus:outline-none focus:ring-2 focus:ring-zinc-950 focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus:ring-zinc-300",
className
)}
{...props}
>
{children}
<SelectPrimitive.Icon asChild>
{noIcon ? <></> : <ChevronDown className="h-4 w-4 opacity-50" />}
</SelectPrimitive.Icon>
</SelectPrimitive.Trigger>
))
SelectTrigger.displayName = SelectPrimitive.Trigger.displayName
const SelectScrollUpButton = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.ScrollUpButton>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollUpButton>
>(({ className, ...props }, ref) => (
<SelectPrimitive.ScrollUpButton
ref={ref}
className={cn(
"flex cursor-default items-center justify-center py-1",
className
)}
{...props}
>
<ChevronUp className="h-4 w-4" />
</SelectPrimitive.ScrollUpButton>
))
SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName
const SelectScrollDownButton = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.ScrollDownButton>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollDownButton>
>(({ className, ...props }, ref) => (
<SelectPrimitive.ScrollDownButton
ref={ref}
className={cn(
"flex cursor-default items-center justify-center py-1",
className
)}
{...props}
>
<ChevronDown className="h-4 w-4" />
</SelectPrimitive.ScrollDownButton>
))
SelectScrollDownButton.displayName =
SelectPrimitive.ScrollDownButton.displayName
const SelectContent = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content>
>(({ className, children, position = "popper", ...props }, ref) => (
<SelectPrimitive.Portal>
<SelectPrimitive.Content
ref={ref}
className={cn(
"relative z-50 max-h-96 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white text-zinc-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
position === "popper" &&
"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
className
)}
position={position}
{...props}
>
<SelectScrollUpButton />
<SelectPrimitive.Viewport
className={cn(
"p-1",
position === "popper" &&
"h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)]"
)}
>
{children}
</SelectPrimitive.Viewport>
<SelectScrollDownButton />
</SelectPrimitive.Content>
</SelectPrimitive.Portal>
))
SelectContent.displayName = SelectPrimitive.Content.displayName
const SelectLabel = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.Label>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Label>
>(({ className, ...props }, ref) => (
<SelectPrimitive.Label
ref={ref}
className={cn("py-1.5 pl-8 pr-2 text-sm font-semibold", className)}
{...props}
/>
))
SelectLabel.displayName = SelectPrimitive.Label.displayName
const SelectItem = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item>
>(({ className, children, ...props }, ref) => (
<SelectPrimitive.Item
ref={ref}
className={cn(
"relative flex w-full cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
className
)}
{...props}
>
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
<SelectPrimitive.ItemIndicator>
<Check className="h-4 w-4" />
</SelectPrimitive.ItemIndicator>
</span>
<SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
</SelectPrimitive.Item>
))
SelectItem.displayName = SelectPrimitive.Item.displayName
const SelectSeparator = React.forwardRef<
React.ElementRef<typeof SelectPrimitive.Separator>,
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Separator>
>(({ className, ...props }, ref) => (
<SelectPrimitive.Separator
ref={ref}
className={cn("-mx-1 my-1 h-px bg-zinc-100 dark:bg-zinc-800", className)}
{...props}
/>
))
SelectSeparator.displayName = SelectPrimitive.Separator.displayName
export {
Select,
SelectGroup,
SelectValue,
SelectTrigger,
SelectContent,
SelectLabel,
SelectItem,
SelectSeparator,
SelectScrollUpButton,
SelectScrollDownButton,
}

View File

@ -0,0 +1,31 @@
"use client"
import { useTheme } from "next-themes"
import { Toaster as Sonner } from "sonner"
type ToasterProps = React.ComponentProps<typeof Sonner>
const Toaster = ({ ...props }: ToasterProps) => {
const { theme = "system" } = useTheme()
return (
<Sonner
theme={theme as ToasterProps["theme"]}
className="toaster group"
toastOptions={{
classNames: {
toast:
"group toast group-[.toaster]:bg-white group-[.toaster]:text-zinc-950 group-[.toaster]:border-zinc-200 group-[.toaster]:shadow-lg dark:group-[.toaster]:bg-zinc-950 dark:group-[.toaster]:text-zinc-50 dark:group-[.toaster]:border-zinc-800",
description: "group-[.toast]:text-zinc-500 dark:group-[.toast]:text-zinc-400",
actionButton:
"group-[.toast]:bg-zinc-900 group-[.toast]:text-zinc-50 dark:group-[.toast]:bg-zinc-50 dark:group-[.toast]:text-zinc-900",
cancelButton:
"group-[.toast]:bg-zinc-100 group-[.toast]:text-zinc-500 dark:group-[.toast]:bg-zinc-800 dark:group-[.toast]:text-zinc-400",
},
}}
{...props}
/>
)
}
export { Toaster }

View File

@ -0,0 +1,29 @@
"use client"
import * as React from "react"
import * as SwitchPrimitives from "@radix-ui/react-switch"
import { cn } from "@/lib/utils"
const Switch = React.forwardRef<
React.ElementRef<typeof SwitchPrimitives.Root>,
React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
>(({ className, ...props }, ref) => (
<SwitchPrimitives.Root
className={cn(
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 focus-visible:ring-offset-white disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-zinc-900 data-[state=unchecked]:bg-zinc-200 dark:focus-visible:ring-zinc-300 dark:focus-visible:ring-offset-zinc-950 dark:data-[state=checked]:bg-zinc-50 dark:data-[state=unchecked]:bg-zinc-800",
className
)}
{...props}
ref={ref}
>
<SwitchPrimitives.Thumb
className={cn(
"pointer-events-none block h-5 w-5 rounded-full bg-white shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 dark:bg-zinc-950"
)}
/>
</SwitchPrimitives.Root>
))
Switch.displayName = SwitchPrimitives.Root.displayName
export { Switch }

View File

@ -0,0 +1,24 @@
import * as React from "react"
import { cn } from "@/lib/utils"
export interface TextareaProps
extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {}
const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
({ className, ...props }, ref) => {
return (
<textarea
className={cn(
"flex min-h-[80px] w-full rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white placeholder:text-zinc-500 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus-visible:ring-zinc-300",
className
)}
ref={ref}
{...props}
/>
)
}
)
Textarea.displayName = "Textarea"
export { Textarea }

View File

@ -0,0 +1,75 @@
import OpenAI from "openai";
import { encoding_for_model } from "@dqbd/tiktoken";
/**
* Function to generate a roast for a website based on its screenshot and markdown content.
* @param roastPrompt - Initial prompt text for the roast.
* @param screenshotUrl - URL of the screenshot of the website.
* @param content - Raw markdown content of the website.
*/
export async function roastPrompt(roastPrompt: string, screenshotUrl: string, content: string) {
try {
// Initialize OpenAI with the API key from environment variables
const openai = new OpenAI({
apiKey: process.env.OPEN_AI_KEY
});
let contentTruncated = await truncateContentToFit(content, 30000);
const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [
{
role: "user",
content: [
{ type: "text", text: roastPrompt },
{ type: "image_url", image_url: { url: screenshotUrl, detail: "low" } },
{ type: "text", text: contentTruncated }
],
},
],
});
// Return the first choice's message instead of logging it
return response.choices[0].message.content;
} catch (error) {
console.error("Error generating roast:", error);
// Assert error as an instance of Error to access message property
return `Error generating roast: ${(error as Error).message}`;
}
}
export function numTokensFromString(message: string, model: string): number {
const encoder = encoding_for_model(model as any);
// Encode the message into tokens
const tokens = encoder.encode(message);
// Free the encoder resources after use
encoder.free();
// Return the number of tokens
return tokens.length;
}
async function truncateContentToFit(content: string, maxTokens: number): Promise<string> {
const modifier = 4;
let contentTotruncate = content;
const numTokens = numTokensFromString(contentTotruncate, "gpt-4");
if (numTokens > maxTokens) {
// trim the document to the maximum number of tokens, tokens != characters
contentTotruncate = content.slice(0, (maxTokens * modifier));
}
return contentTotruncate
}

View File

@ -0,0 +1,6 @@
import { type ClassValue, clsx } from "clsx"
import { twMerge } from "tailwind-merge"
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
}

View File

@ -0,0 +1,76 @@
// pages/api/getRepoStars.ts
import { NextApiRequest, NextApiResponse } from 'next';
import FirecrawlApp from '@mendable/firecrawl-js';
import { roastPrompt } from '../../lib/LLM/llm';
const mildPrompt = "Give me a light hearted roast for this website:"
const mediumPrompt = "Give me a roast for this website:"
const spicyPrompt ="Roast this website. The person who made this website wants to be roasted really hard. I mean REALLY hard, don't hold back, and don't worry about hurting their feelings. Be mean but concisely mean. No more than 2 paragraphs."
async function getScreenShotAndHtml(url: string) {
const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
const scrapeResult = await app.scrapeUrl(url, {
pageOptions: {
includeHtml: true,
screenshot: true
}
});
return {
screenshotUrl: scrapeResult.data.metadata.screenshot,
htmlContent: scrapeResult.data.html
};
}
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
const rawUrl = typeof req.query.url === 'string' ? req.query.url : null;
const spiceLevel = typeof req.query.spiceLevel === 'string' ? parseInt(req.query.spiceLevel, 10) : null;
if (!rawUrl || spiceLevel === null || isNaN(spiceLevel)) {
res.status(400).json({ error: 'Invalid query parameters' });
return;
}
try {
const { screenshotUrl, htmlContent } = await getScreenShotAndHtml(rawUrl);
// Define a roast prompt message
let roastMessage: string;
// Determine the roast message based on the spice level
switch (spiceLevel) {
case 1:
roastMessage = mildPrompt;
break;
case 2:
roastMessage = mediumPrompt;
break;
case 3:
roastMessage = spicyPrompt;
break;
default:
// If an invalid spice level is provided, default to mild roast
roastMessage = mildPrompt;
res.status(400).json({ error: 'Invalid spice level' });
return;
}
// Convert HTML content to a markdown-like format for the roast generation
// This is a simplified conversion, assuming HTML content is already sanitized and suitable for direct usage
// Call the roastPrompt function to generate a roast
const roastResult = await roastPrompt(roastMessage, screenshotUrl, htmlContent);
// Log the roast result for debugging
// console.log("Roast Result:", roastResult);
res.status(200).json({ screenshotUrl, htmlContent, roastResult});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
}

View File

@ -0,0 +1,137 @@
import type { Config } from 'tailwindcss';
import colors from 'tailwindcss/colors';
const config: Config = {
darkMode: 'class',
content: [
'./src/**/*.{js,ts,jsx,tsx}',
// Path to Tremor module
'./node_modules/@tremor/**/*.{js,ts,jsx,tsx}',
],
theme: {
transparent: 'transparent',
current: 'currentColor',
extend: {
colors: {
// light mode
tremor: {
brand: {
faint: colors.blue[50],
muted: colors.blue[200],
subtle: colors.blue[400],
DEFAULT: colors.blue[500],
emphasis: colors.blue[700],
inverted: colors.white,
},
background: {
muted: colors.gray[50],
subtle: colors.gray[100],
DEFAULT: colors.white,
emphasis: colors.gray[700],
},
border: {
DEFAULT: colors.gray[200],
},
ring: {
DEFAULT: colors.gray[200],
},
content: {
subtle: colors.gray[400],
DEFAULT: colors.gray[500],
emphasis: colors.gray[700],
strong: colors.gray[900],
inverted: colors.white,
},
},
// dark mode
'dark-tremor': {
brand: {
faint: '#0B1229',
muted: colors.blue[950],
subtle: colors.blue[800],
DEFAULT: colors.blue[500],
emphasis: colors.blue[400],
inverted: colors.blue[950],
},
background: {
muted: '#131A2B',
subtle: colors.gray[800],
DEFAULT: colors.gray[900],
emphasis: colors.gray[300],
},
border: {
DEFAULT: colors.gray[800],
},
ring: {
DEFAULT: colors.gray[800],
},
content: {
subtle: colors.gray[600],
DEFAULT: colors.gray[500],
emphasis: colors.gray[200],
strong: colors.gray[50],
inverted: colors.gray[950],
},
},
},
boxShadow: {
// light
'tremor-input': '0 1px 2px 0 rgb(0 0 0 / 0.05)',
'tremor-card':
'0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)',
'tremor-dropdown':
'0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
// dark
'dark-tremor-input': '0 1px 2px 0 rgb(0 0 0 / 0.05)',
'dark-tremor-card':
'0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)',
'dark-tremor-dropdown':
'0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
},
borderRadius: {
'tremor-small': '0.375rem',
'tremor-default': '0.5rem',
'tremor-full': '9999px',
},
fontSize: {
'tremor-label': ['0.75rem', { lineHeight: '1rem' }],
'tremor-default': ['0.875rem', { lineHeight: '1.25rem' }],
'tremor-title': ['1.125rem', { lineHeight: '1.75rem' }],
'tremor-metric': ['1.875rem', { lineHeight: '2.25rem' }],
},
},
},
safelist: [
{
pattern:
/^(bg-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
variants: ['hover', 'ui-selected'],
},
{
pattern:
/^(text-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
variants: ['hover', 'ui-selected'],
},
{
pattern:
/^(border-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
variants: ['hover', 'ui-selected'],
},
{
pattern:
/^(ring-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
},
{
pattern:
/^(stroke-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
},
{
pattern:
/^(fill-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
},
],
plugins: [require('@headlessui/tailwindcss'), require('@tailwindcss/forms')],
};
export default config;

View File

@ -0,0 +1,26 @@
{
"compilerOptions": {
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}