Merge branch 'main' into nsc/improved-blocklist
19
.github/workflows/fly.yml
vendored
@ -94,6 +94,25 @@ jobs:
|
||||
run: |
|
||||
npm run test
|
||||
working-directory: ./apps/test-suite
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
working-directory: ./apps/python-sdk
|
||||
- name: Run E2E tests for Python SDK
|
||||
run: |
|
||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||
working-directory: ./apps/python-sdk
|
||||
- name: Install dependencies for JavaScript SDK
|
||||
run: pnpm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
- name: Run E2E tests for JavaScript SDK
|
||||
run: npm run test
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
|
||||
deploy:
|
||||
name: Deploy app
|
||||
|
60
.github/workflows/js-sdk.yml
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
name: Run JavaScript SDK E2E Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
HOST: ${{ secrets.HOST }}
|
||||
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||
PORT: ${{ secrets.PORT }}
|
||||
REDIS_URL: ${{ secrets.REDIS_URL }}
|
||||
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
|
||||
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
|
||||
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
|
||||
HDX_NODE_BETA_MODE: 1
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
redis:
|
||||
image: redis
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
- name: Install dependencies for API
|
||||
run: pnpm install
|
||||
working-directory: ./apps/api
|
||||
- name: Start the application
|
||||
run: npm start &
|
||||
working-directory: ./apps/api
|
||||
- name: Start workers
|
||||
run: npm run workers &
|
||||
working-directory: ./apps/api
|
||||
- name: Install dependencies for JavaScript SDK
|
||||
run: pnpm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
- name: Run E2E tests for JavaScript SDK
|
||||
run: npm run test
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
72
.github/workflows/python-sdk.yml
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
name: Run Python SDK E2E Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
|
||||
HOST: ${{ secrets.HOST }}
|
||||
LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
|
||||
LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
|
||||
NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
|
||||
PORT: ${{ secrets.PORT }}
|
||||
REDIS_URL: ${{ secrets.REDIS_URL }}
|
||||
SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
|
||||
SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
|
||||
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
|
||||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
|
||||
HDX_NODE_BETA_MODE: 1
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
services:
|
||||
redis:
|
||||
image: redis
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
- name: Install dependencies for API
|
||||
run: pnpm install
|
||||
working-directory: ./apps/api
|
||||
- name: Start the application
|
||||
run: npm start &
|
||||
working-directory: ./apps/api
|
||||
id: start_app
|
||||
- name: Start workers
|
||||
run: npm run workers &
|
||||
working-directory: ./apps/api
|
||||
id: start_workers
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
working-directory: ./apps/python-sdk
|
||||
- name: Run E2E tests for Python SDK
|
||||
run: |
|
||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||
working-directory: ./apps/python-sdk
|
@ -39,7 +39,7 @@ SUPABASE_SERVICE_TOKEN=
|
||||
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
|
||||
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
|
||||
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
|
||||
BULL_AUTH_KEY= #
|
||||
BULL_AUTH_KEY= @
|
||||
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
|
||||
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
|
||||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
|
@ -402,7 +402,6 @@ const searchResults = await app.search(query, {
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
||||
|
@ -21,7 +21,7 @@ RATE_LIMIT_TEST_API_KEY_SCRAPE= # set if you'd like to test the scraping rate li
|
||||
RATE_LIMIT_TEST_API_KEY_CRAWL= # set if you'd like to test the crawling rate limit
|
||||
SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
|
||||
OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
|
||||
BULL_AUTH_KEY= #
|
||||
BULL_AUTH_KEY= @
|
||||
LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
|
||||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
|
||||
@ -31,6 +31,13 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
||||
|
||||
STRIPE_PRICE_ID_STANDARD=
|
||||
STRIPE_PRICE_ID_SCALE=
|
||||
STRIPE_PRICE_ID_STARTER=
|
||||
STRIPE_PRICE_ID_HOBBY=
|
||||
STRIPE_PRICE_ID_HOBBY_YEARLY=
|
||||
STRIPE_PRICE_ID_STANDARD_NEW=
|
||||
STRIPE_PRICE_ID_STANDARD_NEW_YEARLY=
|
||||
STRIPE_PRICE_ID_GROWTH=
|
||||
STRIPE_PRICE_ID_GROWTH_YEARLY=
|
||||
|
||||
HYPERDX_API_KEY=
|
||||
HDX_NODE_BETA_MODE=1
|
||||
|
@ -27,6 +27,13 @@ kill_timeout = '5s'
|
||||
hard_limit = 200
|
||||
soft_limit = 100
|
||||
|
||||
[[http_service.checks]]
|
||||
grace_period = "10s"
|
||||
interval = "30s"
|
||||
method = "GET"
|
||||
timeout = "5s"
|
||||
path = "/"
|
||||
|
||||
[[services]]
|
||||
protocol = 'tcp'
|
||||
internal_port = 8080
|
||||
|
@ -50,6 +50,11 @@
|
||||
"type": "boolean",
|
||||
"description": "Include the raw HTML content of the page. Will output a html key in the response.",
|
||||
"default": false
|
||||
},
|
||||
"waitFor": {
|
||||
"type": "integer",
|
||||
"description": "Wait x amount of milliseconds for the page to load to fetch content",
|
||||
"default": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -68,7 +68,7 @@ describe("E2E Tests for API Routes", () => {
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
expect(response.statusCode).toBe(200);
|
||||
}, 10000); // 10 seconds timeout
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
it("should return a successful response with a valid API key", async () => {
|
||||
const response = await request(TEST_URL)
|
||||
@ -134,6 +134,27 @@ describe("E2E Tests for API Routes", () => {
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 60000); // 60 seconds
|
||||
|
||||
// TODO: add this test back once we nail the waitFor option to be more deterministic
|
||||
// it("should return a successful response with a valid API key and waitFor option", async () => {
|
||||
// const startTime = Date.now();
|
||||
// const response = await request(TEST_URL)
|
||||
// .post("/v0/scrape")
|
||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
// .set("Content-Type", "application/json")
|
||||
// .send({ url: "https://firecrawl.dev", pageOptions: { waitFor: 7000 } });
|
||||
// const endTime = Date.now();
|
||||
// const duration = endTime - startTime;
|
||||
|
||||
// expect(response.statusCode).toBe(200);
|
||||
// expect(response.body).toHaveProperty("data");
|
||||
// expect(response.body.data).toHaveProperty("content");
|
||||
// expect(response.body.data).toHaveProperty("markdown");
|
||||
// expect(response.body.data).toHaveProperty("metadata");
|
||||
// expect(response.body.data).not.toHaveProperty("html");
|
||||
// expect(response.body.data.content).toContain("🔥 Firecrawl");
|
||||
// expect(duration).toBeGreaterThanOrEqual(7000);
|
||||
// }, 12000); // 12 seconds timeout
|
||||
});
|
||||
|
||||
describe("POST /v0/crawl", () => {
|
||||
@ -983,7 +1004,7 @@ describe("E2E Tests for API Routes", () => {
|
||||
|
||||
describe("Rate Limiter", () => {
|
||||
it("should return 429 when rate limit is exceeded for preview token", async () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
for (let i = 0; i < 4; i++) {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer this_is_just_a_preview_token`)
|
||||
|
@ -6,7 +6,7 @@ import { withAuth } from "../../src/lib/withAuth";
|
||||
import { RateLimiterRedis } from "rate-limiter-flexible";
|
||||
import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
|
||||
|
||||
export async function authenticateUser(req, res, mode?: RateLimiterMode) : Promise<AuthResponse> {
|
||||
export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
|
||||
return withAuth(supaAuthenticateUser)(req, res, mode);
|
||||
}
|
||||
function setTrace(team_id: string, api_key: string) {
|
||||
@ -29,6 +29,7 @@ export async function supaAuthenticateUser(
|
||||
team_id?: string;
|
||||
error?: string;
|
||||
status?: number;
|
||||
plan?: string;
|
||||
}> {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
@ -104,12 +105,13 @@ export async function supaAuthenticateUser(
|
||||
case RateLimiterMode.Scrape:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan);
|
||||
break;
|
||||
case RateLimiterMode.Search:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan);
|
||||
break;
|
||||
case RateLimiterMode.CrawlStatus:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
|
||||
break;
|
||||
case RateLimiterMode.Search:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.Search, token);
|
||||
break;
|
||||
|
||||
case RateLimiterMode.Preview:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
|
||||
break;
|
||||
@ -126,9 +128,11 @@ export async function supaAuthenticateUser(
|
||||
await rateLimiter.consume(iptoken);
|
||||
} catch (rateLimiterRes) {
|
||||
console.error(rateLimiterRes);
|
||||
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
|
||||
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);
|
||||
return {
|
||||
success: false,
|
||||
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
|
||||
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
|
||||
status: 429,
|
||||
};
|
||||
}
|
||||
@ -170,16 +174,24 @@ export async function supaAuthenticateUser(
|
||||
subscriptionData = data[0];
|
||||
}
|
||||
|
||||
return { success: true, team_id: subscriptionData.team_id };
|
||||
return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""};
|
||||
}
|
||||
|
||||
function getPlanByPriceId(price_id: string) {
|
||||
switch (price_id) {
|
||||
case process.env.STRIPE_PRICE_ID_STARTER:
|
||||
return 'starter';
|
||||
case process.env.STRIPE_PRICE_ID_STANDARD:
|
||||
return 'standard';
|
||||
case process.env.STRIPE_PRICE_ID_SCALE:
|
||||
return 'scale';
|
||||
case process.env.STRIPE_PRICE_ID_HOBBY || process.env.STRIPE_PRICE_ID_HOBBY_YEARLY:
|
||||
return 'hobby';
|
||||
case process.env.STRIPE_PRICE_ID_STANDARD_NEW || process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY:
|
||||
return 'standard-new';
|
||||
case process.env.STRIPE_PRICE_ID_GROWTH || process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
|
||||
return 'growth';
|
||||
default:
|
||||
return 'starter';
|
||||
return 'free';
|
||||
}
|
||||
}
|
@ -15,7 +15,8 @@ export async function scrapeHelper(
|
||||
crawlerOptions: any,
|
||||
pageOptions: PageOptions,
|
||||
extractorOptions: ExtractorOptions,
|
||||
timeout: number
|
||||
timeout: number,
|
||||
plan?: string
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
@ -64,7 +65,9 @@ export async function scrapeHelper(
|
||||
}
|
||||
|
||||
let creditsToBeBilled = filteredDocs.length;
|
||||
const creditsPerLLMExtract = 5;
|
||||
const creditsPerLLMExtract = plan === "starter" ? 5 : 50;
|
||||
|
||||
|
||||
|
||||
if (extractorOptions.mode === "llm-extraction") {
|
||||
creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
|
||||
@ -93,7 +96,7 @@ export async function scrapeHelper(
|
||||
export async function scrapeController(req: Request, res: Response) {
|
||||
try {
|
||||
// make sure to authenticate user first, Bearer <token>
|
||||
const { success, team_id, error, status } = await authenticateUser(
|
||||
const { success, team_id, error, status, plan } = await authenticateUser(
|
||||
req,
|
||||
res,
|
||||
RateLimiterMode.Scrape
|
||||
@ -102,7 +105,7 @@ export async function scrapeController(req: Request, res: Response) {
|
||||
return res.status(status).json({ error });
|
||||
}
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false };
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, waitFor: 0, screenshot: false };
|
||||
const extractorOptions = req.body.extractorOptions ?? {
|
||||
mode: "markdown"
|
||||
}
|
||||
@ -129,7 +132,8 @@ export async function scrapeController(req: Request, res: Response) {
|
||||
crawlerOptions,
|
||||
pageOptions,
|
||||
extractorOptions,
|
||||
timeout
|
||||
timeout,
|
||||
plan
|
||||
);
|
||||
const endTime = new Date().getTime();
|
||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||
|
@ -28,11 +28,13 @@ export async function searchHelper(
|
||||
|
||||
const tbs = searchOptions.tbs ?? null;
|
||||
const filter = searchOptions.filter ?? null;
|
||||
const num_results = searchOptions.limit ?? 7;
|
||||
const num_results_buffer = Math.floor(num_results * 1.5);
|
||||
|
||||
let res = await search({
|
||||
query: query,
|
||||
advanced: advanced,
|
||||
num_results: searchOptions.limit ?? 7,
|
||||
num_results: num_results_buffer,
|
||||
tbs: tbs,
|
||||
filter: filter,
|
||||
lang: searchOptions.lang ?? "en",
|
||||
@ -47,6 +49,9 @@ export async function searchHelper(
|
||||
}
|
||||
|
||||
res = res.filter((r) => !isUrlBlocked(r.url));
|
||||
if (res.length > num_results) {
|
||||
res = res.slice(0, num_results);
|
||||
}
|
||||
|
||||
if (res.length === 0) {
|
||||
return { success: true, error: "No search results found", returnCode: 200 };
|
||||
|
@ -168,3 +168,6 @@ app.get('/serverHealthCheck/notify', async (req, res) => {
|
||||
app.get("/is-production", (req, res) => {
|
||||
res.send({ isProduction: global.isProduction });
|
||||
});
|
||||
|
||||
|
||||
// /workers health check, cant act as load balancer, just has to be a pre deploy thing
|
@ -15,6 +15,9 @@ export type PageOptions = {
|
||||
includeHtml?: boolean;
|
||||
fallback?: boolean;
|
||||
fetchPageContent?: boolean;
|
||||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
headers?: Record<string, string>;
|
||||
};
|
||||
|
||||
export type ExtractorOptions = {
|
||||
@ -105,3 +108,8 @@ export class SearchResult {
|
||||
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
|
||||
}
|
||||
}
|
||||
|
||||
export interface FireEngineResponse {
|
||||
html: string;
|
||||
screenshot: string;
|
||||
}
|
@ -1,42 +1,42 @@
|
||||
import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
|
||||
// import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
|
||||
|
||||
const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
// const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
||||
const scrapInBatches = async (
|
||||
urls: string[],
|
||||
batchSize: number,
|
||||
delayMs: number
|
||||
) => {
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
// const scrapInBatches = async (
|
||||
// urls: string[],
|
||||
// batchSize: number,
|
||||
// delayMs: number
|
||||
// ) => {
|
||||
// let successCount = 0;
|
||||
// let errorCount = 0;
|
||||
|
||||
for (let i = 0; i < urls.length; i += batchSize) {
|
||||
const batch = urls
|
||||
.slice(i, i + batchSize)
|
||||
.map((url) => scrapWithFireEngine(url));
|
||||
try {
|
||||
const results = await Promise.all(batch);
|
||||
results.forEach((data, index) => {
|
||||
if (data.trim() === "") {
|
||||
errorCount++;
|
||||
} else {
|
||||
successCount++;
|
||||
console.log(
|
||||
`Scraping result ${i + index + 1}:`,
|
||||
data.trim().substring(0, 20) + "..."
|
||||
);
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error during scraping:", error);
|
||||
}
|
||||
await delay(delayMs);
|
||||
}
|
||||
// for (let i = 0; i < urls.length; i += batchSize) {
|
||||
// const batch = urls
|
||||
// .slice(i, i + batchSize)
|
||||
// .map((url) => scrapWithFireEngine(url));
|
||||
// try {
|
||||
// const results = await Promise.all(batch);
|
||||
// results.forEach((data, index) => {
|
||||
// if (data.trim() === "") {
|
||||
// errorCount++;
|
||||
// } else {
|
||||
// successCount++;
|
||||
// console.log(
|
||||
// `Scraping result ${i + index + 1}:`,
|
||||
// data.trim().substring(0, 20) + "..."
|
||||
// );
|
||||
// }
|
||||
// });
|
||||
// } catch (error) {
|
||||
// console.error("Error during scraping:", error);
|
||||
// }
|
||||
// await delay(delayMs);
|
||||
// }
|
||||
|
||||
console.log(`Total successful scrapes: ${successCount}`);
|
||||
console.log(`Total errored scrapes: ${errorCount}`);
|
||||
};
|
||||
function run() {
|
||||
const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
|
||||
scrapInBatches(urls, 10, 1000);
|
||||
}
|
||||
// console.log(`Total successful scrapes: ${successCount}`);
|
||||
// console.log(`Total errored scrapes: ${errorCount}`);
|
||||
// };
|
||||
// function run() {
|
||||
// const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
|
||||
// scrapInBatches(urls, 10, 1000);
|
||||
// }
|
||||
|
@ -2,7 +2,7 @@ import * as cheerio from "cheerio";
|
||||
import { ScrapingBeeClient } from "scrapingbee";
|
||||
import { extractMetadata } from "./utils/metadata";
|
||||
import dotenv from "dotenv";
|
||||
import { Document, PageOptions } from "../../lib/entities";
|
||||
import { Document, PageOptions, FireEngineResponse } from "../../lib/entities";
|
||||
import { parseMarkdown } from "../../lib/html-to-markdown";
|
||||
import { excludeNonMainTags } from "./utils/excludeTags";
|
||||
import { urlSpecificParams } from "./utils/custom/website_params";
|
||||
@ -18,7 +18,6 @@ const baseScrapers = [
|
||||
"fetch",
|
||||
] as const;
|
||||
|
||||
|
||||
export async function generateRequestParams(
|
||||
url: string,
|
||||
wait_browser: string = "domcontentloaded",
|
||||
@ -44,38 +43,52 @@ export async function generateRequestParams(
|
||||
}
|
||||
export async function scrapWithFireEngine(
|
||||
url: string,
|
||||
waitFor: number = 0,
|
||||
screenshot: boolean = false,
|
||||
headers?: Record<string, string>,
|
||||
options?: any
|
||||
): Promise<string> {
|
||||
): Promise<FireEngineResponse> {
|
||||
try {
|
||||
const reqParams = await generateRequestParams(url);
|
||||
const wait_playwright = reqParams["params"]?.wait ?? 0;
|
||||
// If the user has passed a wait parameter in the request, use that
|
||||
const waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||
console.log(
|
||||
`[Fire-Engine] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam}`
|
||||
);
|
||||
|
||||
const response = await fetch(process.env.FIRE_ENGINE_BETA_URL+ "/scrape", {
|
||||
const response = await fetch(process.env.FIRE_ENGINE_BETA_URL + "/scrape", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ url: url, wait: wait_playwright }),
|
||||
body: JSON.stringify({
|
||||
url: url,
|
||||
wait: waitParam,
|
||||
screenshot: screenshotParam,
|
||||
headers: headers,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(
|
||||
`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`
|
||||
);
|
||||
return "";
|
||||
return { html: "", screenshot: "" };
|
||||
}
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
if (contentType && contentType.includes('application/pdf')) {
|
||||
return fetchAndProcessPdf(url);
|
||||
const contentType = response.headers["content-type"];
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
return { html: await fetchAndProcessPdf(url), screenshot: "" };
|
||||
} else {
|
||||
const data = await response.json();
|
||||
const html = data.content;
|
||||
return html ?? "";
|
||||
const screenshot = data.screenshot;
|
||||
return { html: html ?? "", screenshot: screenshot ?? "" };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`);
|
||||
return "";
|
||||
return { html: "", screenshot: "" };
|
||||
}
|
||||
}
|
||||
|
||||
@ -101,8 +114,8 @@ export async function scrapWithScrapingBee(
|
||||
return "";
|
||||
}
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
if (contentType && contentType.includes('application/pdf')) {
|
||||
const contentType = response.headers["content-type"];
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
return fetchAndProcessPdf(url);
|
||||
} else {
|
||||
const decoder = new TextDecoder();
|
||||
@ -115,17 +128,22 @@ export async function scrapWithScrapingBee(
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapWithPlaywright(url: string): Promise<string> {
|
||||
export async function scrapWithPlaywright(
|
||||
url: string,
|
||||
waitFor: number = 0,
|
||||
headers?: Record<string, string>
|
||||
): Promise<string> {
|
||||
try {
|
||||
const reqParams = await generateRequestParams(url);
|
||||
const wait_playwright = reqParams["params"]?.wait ?? 0;
|
||||
// If the user has passed a wait parameter in the request, use that
|
||||
const waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||
|
||||
const response = await fetch(process.env.PLAYWRIGHT_MICROSERVICE_URL, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ url: url, wait: wait_playwright }),
|
||||
body: JSON.stringify({ url: url, wait: waitParam, headers: headers }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
@ -135,8 +153,8 @@ export async function scrapWithPlaywright(url: string): Promise<string> {
|
||||
return "";
|
||||
}
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
if (contentType && contentType.includes('application/pdf')) {
|
||||
const contentType = response.headers["content-type"];
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
return fetchAndProcessPdf(url);
|
||||
} else {
|
||||
const data = await response.json();
|
||||
@ -159,8 +177,8 @@ export async function scrapWithFetch(url: string): Promise<string> {
|
||||
return "";
|
||||
}
|
||||
|
||||
const contentType = response.headers['content-type'];
|
||||
if (contentType && contentType.includes('application/pdf')) {
|
||||
const contentType = response.headers["content-type"];
|
||||
if (contentType && contentType.includes("application/pdf")) {
|
||||
return fetchAndProcessPdf(url);
|
||||
} else {
|
||||
const text = await response.text();
|
||||
@ -178,8 +196,13 @@ export async function scrapWithFetch(url: string): Promise<string> {
|
||||
* @param defaultScraper The default scraper to use if the URL does not have a specific scraper order defined
|
||||
* @returns The order of scrapers to be used for scraping a URL
|
||||
*/
|
||||
function getScrapingFallbackOrder(defaultScraper?: string) {
|
||||
const availableScrapers = baseScrapers.filter(scraper => {
|
||||
function getScrapingFallbackOrder(
|
||||
defaultScraper?: string,
|
||||
isWaitPresent: boolean = false,
|
||||
isScreenshotPresent: boolean = false,
|
||||
isHeadersPresent: boolean = false
|
||||
) {
|
||||
const availableScrapers = baseScrapers.filter((scraper) => {
|
||||
switch (scraper) {
|
||||
case "scrapingBee":
|
||||
case "scrapingBeeLoad":
|
||||
@ -193,16 +216,60 @@ function getScrapingFallbackOrder(defaultScraper?: string) {
|
||||
}
|
||||
});
|
||||
|
||||
const defaultOrder = ["scrapingBee", "fire-engine", "playwright", "scrapingBeeLoad", "fetch"];
|
||||
const filteredDefaultOrder = defaultOrder.filter((scraper: typeof baseScrapers[number]) => availableScrapers.includes(scraper));
|
||||
const uniqueScrapers = new Set(defaultScraper ? [defaultScraper, ...filteredDefaultOrder, ...availableScrapers] : [...filteredDefaultOrder, ...availableScrapers]);
|
||||
let defaultOrder = [
|
||||
"scrapingBee",
|
||||
"fire-engine",
|
||||
"playwright",
|
||||
"scrapingBeeLoad",
|
||||
"fetch",
|
||||
];
|
||||
|
||||
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
|
||||
defaultOrder = [
|
||||
"fire-engine",
|
||||
"playwright",
|
||||
...defaultOrder.filter(
|
||||
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
const filteredDefaultOrder = defaultOrder.filter(
|
||||
(scraper: (typeof baseScrapers)[number]) =>
|
||||
availableScrapers.includes(scraper)
|
||||
);
|
||||
const uniqueScrapers = new Set(
|
||||
defaultScraper
|
||||
? [defaultScraper, ...filteredDefaultOrder, ...availableScrapers]
|
||||
: [...filteredDefaultOrder, ...availableScrapers]
|
||||
);
|
||||
const scrapersInOrder = Array.from(uniqueScrapers);
|
||||
return scrapersInOrder as typeof baseScrapers[number][];
|
||||
console.log(`Scrapers in order: ${scrapersInOrder}`);
|
||||
return scrapersInOrder as (typeof baseScrapers)[number][];
|
||||
}
|
||||
|
||||
async function handleCustomScraping(
|
||||
text: string,
|
||||
url: string
|
||||
): Promise<FireEngineResponse | null> {
|
||||
if (text.includes('<meta name="readme-deploy"')) {
|
||||
console.log(
|
||||
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
|
||||
);
|
||||
return await scrapWithFireEngine(url, 1000);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function scrapSingleUrl(
|
||||
urlToScrap: string,
|
||||
pageOptions: PageOptions = { onlyMainContent: true, includeHtml: false },
|
||||
pageOptions: PageOptions = {
|
||||
onlyMainContent: true,
|
||||
includeHtml: false,
|
||||
waitFor: 0,
|
||||
screenshot: false,
|
||||
headers: {}
|
||||
},
|
||||
existingHtml: string = ""
|
||||
): Promise<Document> {
|
||||
urlToScrap = urlToScrap.trim();
|
||||
@ -221,13 +288,22 @@ export async function scrapSingleUrl(
|
||||
|
||||
const attemptScraping = async (
|
||||
url: string,
|
||||
method: typeof baseScrapers[number]
|
||||
method: (typeof baseScrapers)[number]
|
||||
) => {
|
||||
let text = "";
|
||||
let screenshot = "";
|
||||
switch (method) {
|
||||
case "fire-engine":
|
||||
if (process.env.FIRE_ENGINE_BETA_URL) {
|
||||
text = await scrapWithFireEngine(url);
|
||||
console.log(`Scraping ${url} with Fire Engine`);
|
||||
const response = await scrapWithFireEngine(
|
||||
url,
|
||||
pageOptions.waitFor,
|
||||
pageOptions.screenshot,
|
||||
pageOptions.headers
|
||||
);
|
||||
text = response.html;
|
||||
screenshot = response.screenshot;
|
||||
}
|
||||
break;
|
||||
case "scrapingBee":
|
||||
@ -241,7 +317,7 @@ export async function scrapSingleUrl(
|
||||
break;
|
||||
case "playwright":
|
||||
if (process.env.PLAYWRIGHT_MICROSERVICE_URL) {
|
||||
text = await scrapWithPlaywright(url);
|
||||
text = await scrapWithPlaywright(url, pageOptions.waitFor, pageOptions.headers);
|
||||
}
|
||||
break;
|
||||
case "scrapingBeeLoad":
|
||||
@ -254,13 +330,20 @@ export async function scrapSingleUrl(
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for custom scraping conditions
|
||||
const customScrapedContent = await handleCustomScraping(text, url);
|
||||
if (customScrapedContent) {
|
||||
text = customScrapedContent.html;
|
||||
screenshot = customScrapedContent.screenshot;
|
||||
}
|
||||
|
||||
//* TODO: add an optional to return markdown or structured/extracted content
|
||||
let cleanedHtml = removeUnwantedElements(text, pageOptions);
|
||||
|
||||
return [await parseMarkdown(cleanedHtml), text];
|
||||
return [await parseMarkdown(cleanedHtml), text, screenshot];
|
||||
};
|
||||
try {
|
||||
let [text, html] = ["", ""];
|
||||
let [text, html, screenshot] = ["", "", ""];
|
||||
let urlKey = urlToScrap;
|
||||
try {
|
||||
urlKey = new URL(urlToScrap).hostname.replace(/^www\./, "");
|
||||
@ -268,7 +351,12 @@ export async function scrapSingleUrl(
|
||||
console.error(`Invalid URL key, trying: ${urlToScrap}`);
|
||||
}
|
||||
const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? "";
|
||||
const scrapersInOrder = getScrapingFallbackOrder(defaultScraper)
|
||||
const scrapersInOrder = getScrapingFallbackOrder(
|
||||
defaultScraper,
|
||||
pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0,
|
||||
pageOptions && pageOptions.screenshot && pageOptions.screenshot === true,
|
||||
pageOptions && pageOptions.headers && pageOptions.headers !== undefined
|
||||
);
|
||||
|
||||
for (const scraper of scrapersInOrder) {
|
||||
// If exists text coming from crawler, use it
|
||||
@ -278,7 +366,7 @@ export async function scrapSingleUrl(
|
||||
html = existingHtml;
|
||||
break;
|
||||
}
|
||||
[text, html] = await attemptScraping(urlToScrap, scraper);
|
||||
[text, html, screenshot] = await attemptScraping(urlToScrap, scraper);
|
||||
if (text && text.trim().length >= 100) break;
|
||||
const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
|
||||
if (nextScraperIndex < scrapersInOrder.length) {
|
||||
@ -292,12 +380,27 @@ export async function scrapSingleUrl(
|
||||
|
||||
const soup = cheerio.load(html);
|
||||
const metadata = extractMetadata(soup, urlToScrap);
|
||||
const document: Document = {
|
||||
|
||||
let document: Document;
|
||||
if (screenshot && screenshot.length > 0) {
|
||||
document = {
|
||||
content: text,
|
||||
markdown: text,
|
||||
html: pageOptions.includeHtml ? html : undefined,
|
||||
metadata: {
|
||||
...metadata,
|
||||
screenshot: screenshot,
|
||||
sourceURL: urlToScrap,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
document = {
|
||||
content: text,
|
||||
markdown: text,
|
||||
html: pageOptions.includeHtml ? html : undefined,
|
||||
metadata: { ...metadata, sourceURL: urlToScrap },
|
||||
};
|
||||
}
|
||||
|
||||
return document;
|
||||
} catch (error) {
|
||||
|
@ -141,5 +141,23 @@ export const urlSpecificParams = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||
},
|
||||
},
|
||||
"firecrawl.dev":{
|
||||
defaultScraper: "fire-engine",
|
||||
params: {
|
||||
|
||||
},
|
||||
headers: {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-dest": "empty",
|
||||
referer: "https://www.google.com/",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||
},
|
||||
}
|
||||
};
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { withAuth } from "../../lib/withAuth";
|
||||
import { supabase_service } from "../supabase";
|
||||
|
||||
const FREE_CREDITS = 300;
|
||||
const FREE_CREDITS = 500;
|
||||
|
||||
export async function billTeam(team_id: string, credits: number) {
|
||||
return withAuth(supaBillTeam)(team_id, credits);
|
||||
|
@ -2,133 +2,68 @@ import { RateLimiterRedis } from "rate-limiter-flexible";
|
||||
import * as redis from "redis";
|
||||
import { RateLimiterMode } from "../../src/types";
|
||||
|
||||
const MAX_CRAWLS_PER_MINUTE_STARTER = 3;
|
||||
const MAX_CRAWLS_PER_MINUTE_STANDARD = 5;
|
||||
const MAX_CRAWLS_PER_MINUTE_SCALE = 20;
|
||||
|
||||
const MAX_SCRAPES_PER_MINUTE_STARTER = 20;
|
||||
const MAX_SCRAPES_PER_MINUTE_STANDARD = 40;
|
||||
const MAX_SCRAPES_PER_MINUTE_SCALE = 50;
|
||||
|
||||
const MAX_SEARCHES_PER_MINUTE_STARTER = 20;
|
||||
const MAX_SEARCHES_PER_MINUTE_STANDARD = 40;
|
||||
const MAX_SEARCHES_PER_MINUTE_SCALE = 50;
|
||||
|
||||
const MAX_REQUESTS_PER_MINUTE_PREVIEW = 5;
|
||||
const MAX_REQUESTS_PER_MINUTE_ACCOUNT = 20;
|
||||
const MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS = 150;
|
||||
const RATE_LIMITS = {
|
||||
crawl: {
|
||||
free: 1,
|
||||
starter: 3,
|
||||
standard: 5,
|
||||
scale: 20,
|
||||
hobby: 3,
|
||||
standardNew: 10,
|
||||
growth: 50,
|
||||
},
|
||||
scrape: {
|
||||
free: 5,
|
||||
starter: 20,
|
||||
standardOld: 40,
|
||||
scale: 50,
|
||||
hobby: 10,
|
||||
standardNew: 50,
|
||||
growth: 500,
|
||||
},
|
||||
search: {
|
||||
free: 5,
|
||||
starter: 20,
|
||||
standard: 40,
|
||||
scale: 50,
|
||||
hobby: 10,
|
||||
standardNew: 50,
|
||||
growth: 500,
|
||||
},
|
||||
preview: 5,
|
||||
account: 20,
|
||||
crawlStatus: 150,
|
||||
testSuite: 10000,
|
||||
};
|
||||
|
||||
export const redisClient = redis.createClient({
|
||||
url: process.env.REDIS_URL,
|
||||
legacyMode: true,
|
||||
});
|
||||
|
||||
export const previewRateLimiter = new RateLimiterRedis({
|
||||
const createRateLimiter = (keyPrefix, points) => new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "preview",
|
||||
points: MAX_REQUESTS_PER_MINUTE_PREVIEW,
|
||||
keyPrefix,
|
||||
points,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
|
||||
export const serverRateLimiter = new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "server",
|
||||
points: MAX_REQUESTS_PER_MINUTE_ACCOUNT,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
export const previewRateLimiter = createRateLimiter("preview", RATE_LIMITS.preview);
|
||||
export const serverRateLimiter = createRateLimiter("server", RATE_LIMITS.account);
|
||||
export const crawlStatusRateLimiter = createRateLimiter("crawl-status", RATE_LIMITS.crawlStatus);
|
||||
export const testSuiteRateLimiter = createRateLimiter("test-suite", RATE_LIMITS.testSuite);
|
||||
|
||||
export const crawlStatusRateLimiter = new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "crawl-status",
|
||||
points: MAX_REQUESTS_PER_MINUTE_CRAWL_STATUS,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
|
||||
export const testSuiteRateLimiter = new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "test-suite",
|
||||
points: 10000,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
|
||||
|
||||
export function getRateLimiter(mode: RateLimiterMode, token: string, plan?: string){
|
||||
// Special test suite case. TODO: Change this later.
|
||||
if (token.includes("5089cefa58") || token.includes("6254cf9")){
|
||||
export function getRateLimiter(mode: RateLimiterMode, token: string, plan?: string) {
|
||||
if (token.includes("5089cefa58") || token.includes("6254cf9")) {
|
||||
return testSuiteRateLimiter;
|
||||
}
|
||||
switch (mode) {
|
||||
case RateLimiterMode.Preview:
|
||||
return previewRateLimiter;
|
||||
case RateLimiterMode.CrawlStatus:
|
||||
return crawlStatusRateLimiter;
|
||||
case RateLimiterMode.Crawl:
|
||||
if (plan === "standard"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "crawl-standard",
|
||||
points: MAX_CRAWLS_PER_MINUTE_STANDARD,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
} else if (plan === "scale"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "crawl-scale",
|
||||
points: MAX_CRAWLS_PER_MINUTE_SCALE,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
}
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "crawl-starter",
|
||||
points: MAX_CRAWLS_PER_MINUTE_STARTER,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
case RateLimiterMode.Scrape:
|
||||
if (plan === "standard"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "scrape-standard",
|
||||
points: MAX_SCRAPES_PER_MINUTE_STANDARD,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
} else if (plan === "scale"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "scrape-scale",
|
||||
points: MAX_SCRAPES_PER_MINUTE_SCALE,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
}
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "scrape-starter",
|
||||
points: MAX_SCRAPES_PER_MINUTE_STARTER,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
case RateLimiterMode.Search:
|
||||
if (plan === "standard"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "search-standard",
|
||||
points: MAX_SEARCHES_PER_MINUTE_STANDARD,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
} else if (plan === "scale"){
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "search-scale",
|
||||
points: MAX_SEARCHES_PER_MINUTE_SCALE,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
}
|
||||
return new RateLimiterRedis({
|
||||
storeClient: redisClient,
|
||||
keyPrefix: "search-starter",
|
||||
points: MAX_SEARCHES_PER_MINUTE_STARTER,
|
||||
duration: 60, // Duration in seconds
|
||||
});
|
||||
default:
|
||||
return serverRateLimiter;
|
||||
}
|
||||
|
||||
|
||||
const rateLimitConfig = RATE_LIMITS[mode];
|
||||
if (!rateLimitConfig) return serverRateLimiter;
|
||||
|
||||
const planKey = plan ? plan.replace("-", "") : "starter";
|
||||
const points = rateLimitConfig[planKey] || rateLimitConfig.preview;
|
||||
|
||||
return createRateLimiter(`${mode}-${planKey}`, points);
|
||||
}
|
||||
|
@ -57,6 +57,7 @@ export interface AuthResponse {
|
||||
team_id?: string;
|
||||
error?: string;
|
||||
status?: number;
|
||||
plan?: string;
|
||||
}
|
||||
|
||||
|
||||
|
3
apps/js-sdk/firecrawl/.env.example
Normal file
@ -0,0 +1,3 @@
|
||||
API_URL=http://localhost:3002
|
||||
TEST_API_KEY=fc-YOUR_API_KEY
|
||||
|
66
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,22 +1,27 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.17-beta.8",
|
||||
"version": "0.0.22",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.17-beta.8",
|
||||
"version": "0.0.22",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typescript": "^5.4.5"
|
||||
@ -1013,6 +1018,16 @@
|
||||
"@babel/types": "^7.20.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/dotenv": {
|
||||
"version": "8.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz",
|
||||
"integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==",
|
||||
"deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"dotenv": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/graceful-fs": {
|
||||
"version": "4.1.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
|
||||
@ -1046,10 +1061,20 @@
|
||||
"@types/istanbul-lib-report": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/jest": {
|
||||
"version": "29.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
|
||||
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"expect": "^29.0.0",
|
||||
"pretty-format": "^29.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.12.7",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
|
||||
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
|
||||
"version": "20.12.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz",
|
||||
"integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
@ -1061,6 +1086,12 @@
|
||||
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||
"integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/yargs": {
|
||||
"version": "17.0.32",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
|
||||
@ -1602,6 +1633,17 @@
|
||||
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/electron-to-chromium": {
|
||||
"version": "1.4.748",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz",
|
||||
@ -3641,6 +3683,18 @@
|
||||
"browserslist": ">= 4.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-to-istanbul": {
|
||||
"version": "9.2.0",
|
||||
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",
|
||||
|
@ -9,7 +9,7 @@
|
||||
"build": "tsc",
|
||||
"publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "jest src/**/*.test.ts"
|
||||
"test": "jest src/__tests__/**/*.test.ts"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@ -19,6 +19,8 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
@ -29,7 +31,10 @@
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typescript": "^5.4.5"
|
||||
|
146
apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
Normal file
@ -0,0 +1,146 @@
|
||||
import FirecrawlApp from '../../index';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||
const API_URL = process.env.API_URL;
|
||||
|
||||
describe('FirecrawlApp E2E Tests', () => {
|
||||
test('should throw error for no API key', () => {
|
||||
expect(() => {
|
||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||
}).toThrow("No API key provided");
|
||||
});
|
||||
|
||||
test('should throw error for invalid API key on scrape', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response with valid preview token', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data).toHaveProperty('markdown');
|
||||
expect(response.data).toHaveProperty('metadata');
|
||||
expect(response.data).not.toHaveProperty('html');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response with valid API key and include HTML', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
||||
expect(response.data.html).toContain("<h1");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on crawl', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response for crawl and wait for completion', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response[0].content).toContain("🔥 Firecrawl");
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test('should handle idempotency key for crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.jobId).toBeDefined();
|
||||
|
||||
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
});
|
||||
|
||||
test('should check crawl status', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.jobId).toBeDefined();
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 30000)); // wait for 30 seconds
|
||||
const statusResponse = await app.checkCrawlStatus(response.jobId);
|
||||
expect(statusResponse).not.toBeNull();
|
||||
expect(statusResponse.status).toBe('completed');
|
||||
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||
}, 35000); // 35 seconds timeout
|
||||
|
||||
test('should return successful response for search', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.search("test query");
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data[0].content).toBeDefined();
|
||||
expect(response.data.length).toBeGreaterThan(2);
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on search', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should perform LLM extraction', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||
extractorOptions: {
|
||||
mode: 'llm-extraction',
|
||||
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||
extractionSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
company_mission: { type: 'string' },
|
||||
supports_sso: { type: 'boolean' },
|
||||
is_open_source: { type: 'boolean' }
|
||||
},
|
||||
required: ['company_mission', 'supports_sso', 'is_open_source']
|
||||
}
|
||||
}
|
||||
});
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.llm_extraction).toBeDefined();
|
||||
const llmExtraction = response.data.llm_extraction;
|
||||
expect(llmExtraction.company_mission).toBeDefined();
|
||||
expect(typeof llmExtraction.supports_sso).toBe('boolean');
|
||||
expect(typeof llmExtraction.is_open_source).toBe('boolean');
|
||||
}, 30000); // 30 seconds timeout
|
||||
});
|
@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
*/
|
||||
export interface FirecrawlAppConfig {
|
||||
apiKey?: string | null;
|
||||
apiUrl?: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,6 +64,7 @@ export interface JobStatusResponse {
|
||||
*/
|
||||
export default class FirecrawlApp {
|
||||
private apiKey: string;
|
||||
private apiUrl: string = "https://api.firecrawl.dev";
|
||||
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
@ -107,7 +109,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/scrape",
|
||||
this.apiUrl + "/v0/scrape",
|
||||
jsonData,
|
||||
{ headers },
|
||||
);
|
||||
@ -147,7 +149,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/search",
|
||||
this.apiUrl + "/v0/search",
|
||||
jsonData,
|
||||
{ headers }
|
||||
);
|
||||
@ -190,7 +192,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
"https://api.firecrawl.dev/v0/crawl",
|
||||
this.apiUrl + "/v0/crawl",
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
@ -220,7 +222,7 @@ export default class FirecrawlApp {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
@ -292,7 +294,7 @@ export default class FirecrawlApp {
|
||||
): Promise<any> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
|
38
apps/js-sdk/package-lock.json
generated
@ -11,9 +11,10 @@
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
@ -531,6 +532,17 @@
|
||||
"node": ">=0.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://dotenvx.com"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.20.2",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
|
||||
@ -744,6 +756,18 @@
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-compile-cache-lib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
@ -772,18 +796,6 @@
|
||||
"peerDependencies": {
|
||||
"zod": "^3.23.3"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11,9 +11,8 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"@mendable/firecrawl-js": "^0.0.19",
|
||||
"axios": "^1.6.8",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.23.8"
|
||||
|
@ -4,10 +4,10 @@ from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from os import environ
|
||||
|
||||
PROXY_SERVER = environ.get('PROXY_SERVER', None)
|
||||
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
|
||||
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
|
||||
BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
|
||||
PROXY_SERVER = environ.get("PROXY_SERVER", None)
|
||||
PROXY_USERNAME = environ.get("PROXY_USERNAME", None)
|
||||
PROXY_PASSWORD = environ.get("PROXY_PASSWORD", None)
|
||||
BLOCK_MEDIA = environ.get("BLOCK_MEDIA", "False").upper() == "TRUE"
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@ -15,6 +15,8 @@ app = FastAPI()
|
||||
class UrlModel(BaseModel):
|
||||
url: str
|
||||
wait: int = None
|
||||
wait_until: str = "load"
|
||||
headers: dict = None
|
||||
|
||||
|
||||
browser: Browser = None
|
||||
@ -36,26 +38,37 @@ async def shutdown_event():
|
||||
async def root(body: UrlModel):
|
||||
context = None
|
||||
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
|
||||
context = await browser.new_context(proxy={"server": PROXY_SERVER,
|
||||
context = await browser.new_context(
|
||||
proxy={
|
||||
"server": PROXY_SERVER,
|
||||
"username": PROXY_USERNAME,
|
||||
"password": PROXY_PASSWORD})
|
||||
"password": PROXY_PASSWORD,
|
||||
}
|
||||
)
|
||||
else:
|
||||
context = await browser.new_context()
|
||||
|
||||
if BLOCK_MEDIA:
|
||||
await context.route("**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
|
||||
handler=lambda route, request: route.abort())
|
||||
await context.route(
|
||||
"**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
|
||||
handler=lambda route, request: route.abort(),
|
||||
)
|
||||
|
||||
page = await context.new_page()
|
||||
|
||||
# Set headers if provided
|
||||
if body.headers:
|
||||
await page.set_extra_http_headers(body.headers)
|
||||
|
||||
await page.goto(
|
||||
body.url,
|
||||
wait_until="load",
|
||||
timeout=body.timeout if body.timeout else 15000,
|
||||
)
|
||||
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
|
||||
if body.wait:
|
||||
await page.wait_for_timeout(body.wait)
|
||||
|
||||
timeout=15000,
|
||||
wait_until=body.wait_until if body.wait_until else "load",
|
||||
) # Set max timeout to 15s
|
||||
if body.wait: # Check if wait parameter is provided in the request body
|
||||
await page.wait_for_timeout(
|
||||
body.wait
|
||||
) # Convert seconds to milliseconds for playwright
|
||||
page_content = await page.content()
|
||||
await context.close()
|
||||
json_compatible_item_data = {"content": page_content}
|
||||
|
@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id)
|
||||
|
||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
||||
|
||||
## Running the Tests with Pytest
|
||||
|
||||
To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
|
||||
|
||||
### Running the Tests
|
||||
|
||||
To run the tests, execute the following commands:
|
||||
|
||||
Install pytest:
|
||||
```bash
|
||||
pip install pytest
|
||||
```
|
||||
|
||||
Run:
|
||||
```bash
|
||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||
```
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||
|
@ -1,18 +1,50 @@
|
||||
"""
|
||||
FirecrawlApp Module
|
||||
|
||||
This module provides a class `FirecrawlApp` for interacting with the Firecrawl API.
|
||||
It includes methods to scrape URLs, perform searches, initiate and monitor crawl jobs,
|
||||
and check the status of these jobs. The module uses requests for HTTP communication
|
||||
and handles retries for certain HTTP status codes.
|
||||
|
||||
Classes:
|
||||
- FirecrawlApp: Main class for interacting with the Firecrawl API.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
import requests
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class FirecrawlApp:
|
||||
def __init__(self, api_key=None, api_url='https://api.firecrawl.dev'):
|
||||
"""
|
||||
Initialize the FirecrawlApp instance.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): API key for authenticating with the Firecrawl API.
|
||||
api_url (Optional[str]): Base URL for the Firecrawl API.
|
||||
"""
|
||||
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
|
||||
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
|
||||
if self.api_key is None:
|
||||
raise ValueError('No API key provided')
|
||||
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL')
|
||||
|
||||
|
||||
|
||||
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
|
||||
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||
"""
|
||||
Scrape the specified URL using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
url (str): The URL to scrape.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the scrape request.
|
||||
|
||||
Returns:
|
||||
Any: The scraped data if the request is successful.
|
||||
|
||||
Raises:
|
||||
Exception: If the scrape request fails.
|
||||
"""
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
@ -41,11 +73,11 @@ class FirecrawlApp:
|
||||
response = requests.post(
|
||||
f'{self.api_url}/v0/scrape',
|
||||
headers=headers,
|
||||
json=scrape_params
|
||||
json=scrape_params,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success']:
|
||||
if response['success'] and 'data' in response:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||
@ -56,6 +88,19 @@ class FirecrawlApp:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||
|
||||
def search(self, query, params=None):
|
||||
"""
|
||||
Perform a search using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the search request.
|
||||
|
||||
Returns:
|
||||
Any: The search results if the request is successful.
|
||||
|
||||
Raises:
|
||||
Exception: If the search request fails.
|
||||
"""
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
@ -70,7 +115,8 @@ class FirecrawlApp:
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
|
||||
if response['success'] and 'data' in response:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||
@ -81,8 +127,24 @@ class FirecrawlApp:
|
||||
else:
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||
headers = self._prepare_headers()
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
|
||||
"""
|
||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
url (str): The URL to crawl.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
||||
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
||||
timeout (int): Timeout between status checks when waiting for job completion.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Any: The crawl job ID or the crawl results if waiting until completion.
|
||||
|
||||
Raises:
|
||||
Exception: If the crawl job initiation or monitoring fails.
|
||||
"""
|
||||
headers = self._prepare_headers(idempotency_key)
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
@ -97,6 +159,18 @@ class FirecrawlApp:
|
||||
self._handle_error(response, 'start crawl job')
|
||||
|
||||
def check_crawl_status(self, job_id):
|
||||
"""
|
||||
Check the status of a crawl job using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
job_id (str): The ID of the crawl job.
|
||||
|
||||
Returns:
|
||||
Any: The status of the crawl job.
|
||||
|
||||
Raises:
|
||||
Exception: If the status check request fails.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
|
||||
if response.status_code == 200:
|
||||
@ -104,13 +178,45 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'check crawl status')
|
||||
|
||||
def _prepare_headers(self):
|
||||
def _prepare_headers(self, idempotency_key=None):
|
||||
"""
|
||||
Prepare the headers for API requests.
|
||||
|
||||
Args:
|
||||
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
|
||||
"""
|
||||
if idempotency_key:
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
'x-idempotency-key': idempotency_key
|
||||
}
|
||||
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
}
|
||||
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||
"""
|
||||
Make a POST request with retries.
|
||||
|
||||
Args:
|
||||
url (str): The URL to send the POST request to.
|
||||
data (Dict[str, Any]): The JSON data to include in the POST request.
|
||||
headers (Dict[str, str]): The headers to include in the POST request.
|
||||
retries (int): Number of retries for the request.
|
||||
backoff_factor (float): Backoff factor for retries.
|
||||
|
||||
Returns:
|
||||
requests.Response: The response from the POST request.
|
||||
|
||||
Raises:
|
||||
requests.RequestException: If the request fails after the specified retries.
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
if response.status_code == 502:
|
||||
@ -120,6 +226,21 @@ class FirecrawlApp:
|
||||
return response
|
||||
|
||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
|
||||
"""
|
||||
Make a GET request with retries.
|
||||
|
||||
Args:
|
||||
url (str): The URL to send the GET request to.
|
||||
headers (Dict[str, str]): The headers to include in the GET request.
|
||||
retries (int): Number of retries for the request.
|
||||
backoff_factor (float): Backoff factor for retries.
|
||||
|
||||
Returns:
|
||||
requests.Response: The response from the GET request.
|
||||
|
||||
Raises:
|
||||
requests.RequestException: If the request fails after the specified retries.
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 502:
|
||||
@ -129,7 +250,20 @@ class FirecrawlApp:
|
||||
return response
|
||||
|
||||
def _monitor_job_status(self, job_id, headers, timeout):
|
||||
import time
|
||||
"""
|
||||
Monitor the status of a crawl job until completion.
|
||||
|
||||
Args:
|
||||
job_id (str): The ID of the crawl job.
|
||||
headers (Dict[str, str]): The headers to include in the status check requests.
|
||||
timeout (int): Timeout between status checks.
|
||||
|
||||
Returns:
|
||||
Any: The crawl results if the job is completed successfully.
|
||||
|
||||
Raises:
|
||||
Exception: If the job fails or an error occurs during status checks.
|
||||
"""
|
||||
while True:
|
||||
status_response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
|
||||
if status_response.status_code == 200:
|
||||
@ -139,9 +273,8 @@ class FirecrawlApp:
|
||||
return status_data['data']
|
||||
else:
|
||||
raise Exception('Crawl job completed but no data was returned')
|
||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
|
||||
if timeout < 2:
|
||||
timeout = 2
|
||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
|
||||
timeout=max(timeout,2)
|
||||
time.sleep(timeout) # Wait for the specified timeout before checking again
|
||||
else:
|
||||
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
|
||||
@ -149,6 +282,16 @@ class FirecrawlApp:
|
||||
self._handle_error(status_response, 'check crawl status')
|
||||
|
||||
def _handle_error(self, response, action):
|
||||
"""
|
||||
Handle errors from API responses.
|
||||
|
||||
Args:
|
||||
response (requests.Response): The response object from the API request.
|
||||
action (str): Description of the action that was being performed.
|
||||
|
||||
Raises:
|
||||
Exception: An exception with a message containing the status code and error details from the response.
|
||||
"""
|
||||
if response.status_code in [402, 408, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')
|
||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.12.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.9.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl_py-0.0.12-py3-none-any.whl
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
API_URL=http://localhost:3002
|
||||
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
|
||||
TEST_API_KEY=fc-YOUR_API_KEY
|
168
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
Normal file
@ -0,0 +1,168 @@
|
||||
import importlib.util
|
||||
import pytest
|
||||
import time
|
||||
import os
|
||||
from uuid import uuid4
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
API_URL = "http://127.0.0.1:3002";
|
||||
ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
|
||||
TEST_API_KEY = os.getenv('TEST_API_KEY')
|
||||
|
||||
print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
|
||||
|
||||
spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
|
||||
firecrawl = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(firecrawl)
|
||||
FirecrawlApp = firecrawl.FirecrawlApp
|
||||
|
||||
def test_no_api_key():
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
invalid_app = FirecrawlApp(api_url=API_URL)
|
||||
assert "No API key provided" in str(excinfo.value)
|
||||
|
||||
def test_scrape_url_invalid_api_key():
|
||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
invalid_app.scrape_url('https://firecrawl.dev')
|
||||
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
|
||||
|
||||
def test_blocklisted_url():
|
||||
blocklisted_url = "https://facebook.com/fake-test"
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
app.scrape_url(blocklisted_url)
|
||||
assert "Failed to scrape URL. Status code: 403" in str(excinfo.value)
|
||||
|
||||
def test_successful_response_with_valid_preview_token():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
||||
response = app.scrape_url('https://firecrawl.dev')
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
|
||||
def test_scrape_url_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://firecrawl.dev')
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'markdown' in response
|
||||
assert 'metadata' in response
|
||||
assert 'html' not in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
|
||||
def test_successful_response_with_valid_api_key_and_include_html():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'markdown' in response
|
||||
assert 'html' in response
|
||||
assert 'metadata' in response
|
||||
assert "🔥 Firecrawl" in response['content']
|
||||
assert "🔥 Firecrawl" in response['markdown']
|
||||
assert "<h1" in response['html']
|
||||
|
||||
def test_successful_response_for_valid_scrape_with_pdf_file():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'metadata' in response
|
||||
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
|
||||
|
||||
def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
|
||||
time.sleep(6) # wait for 6 seconds
|
||||
assert response is not None
|
||||
assert 'content' in response
|
||||
assert 'metadata' in response
|
||||
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
|
||||
|
||||
def test_crawl_url_invalid_api_key():
|
||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
invalid_app.crawl_url('https://firecrawl.dev')
|
||||
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
|
||||
|
||||
def test_should_return_error_for_blocklisted_url():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
blocklisted_url = "https://twitter.com/fake-test"
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
app.crawl_url(blocklisted_url)
|
||||
assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value)
|
||||
|
||||
def test_crawl_url_wait_for_completion_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||
assert response is not None
|
||||
assert len(response) > 0
|
||||
assert 'content' in response[0]
|
||||
assert "🔥 Firecrawl" in response[0]['content']
|
||||
|
||||
def test_crawl_url_with_idempotency_key_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
uniqueIdempotencyKey = str(uuid4())
|
||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||
assert response is not None
|
||||
assert len(response) > 0
|
||||
assert 'content' in response[0]
|
||||
assert "🔥 Firecrawl" in response[0]['content']
|
||||
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||
assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value)
|
||||
|
||||
def test_check_crawl_status_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
|
||||
assert response is not None
|
||||
assert 'jobId' in response
|
||||
|
||||
time.sleep(30) # wait for 30 seconds
|
||||
status_response = app.check_crawl_status(response['jobId'])
|
||||
assert status_response is not None
|
||||
assert 'status' in status_response
|
||||
assert status_response['status'] == 'completed'
|
||||
assert 'data' in status_response
|
||||
assert len(status_response['data']) > 0
|
||||
|
||||
def test_search_e2e():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.search("test query")
|
||||
assert response is not None
|
||||
assert 'content' in response[0]
|
||||
assert len(response) > 2
|
||||
|
||||
def test_search_invalid_api_key():
|
||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
invalid_app.search("test query")
|
||||
assert "Failed to search. Status code: 401" in str(excinfo.value)
|
||||
|
||||
def test_llm_extraction():
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||
response = app.scrape_url("https://mendable.ai", {
|
||||
'extractorOptions': {
|
||||
'mode': 'llm-extraction',
|
||||
'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||
'extractionSchema': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'company_mission': {'type': 'string'},
|
||||
'supports_sso': {'type': 'boolean'},
|
||||
'is_open_source': {'type': 'boolean'}
|
||||
},
|
||||
'required': ['company_mission', 'supports_sso', 'is_open_source']
|
||||
}
|
||||
}
|
||||
})
|
||||
assert response is not None
|
||||
assert 'llm_extraction' in response
|
||||
llm_extraction = response['llm_extraction']
|
||||
assert 'company_mission' in llm_extraction
|
||||
assert isinstance(llm_extraction['supports_sso'], bool)
|
||||
assert isinstance(llm_extraction['is_open_source'], bool)
|
@ -273,7 +273,7 @@ class FirecrawlApp:
|
||||
return status_data['data']
|
||||
else:
|
||||
raise Exception('Crawl job completed but no data was returned')
|
||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
|
||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
|
||||
timeout=max(timeout,2)
|
||||
time.sleep(timeout) # Wait for the specified timeout before checking again
|
||||
else:
|
||||
|
@ -1,7 +1,179 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: firecrawl-py
|
||||
Version: 0.0.9
|
||||
Version: 0.0.12
|
||||
Summary: Python SDK for Firecrawl API
|
||||
Home-page: https://github.com/mendableai/firecrawl
|
||||
Author: Mendable.ai
|
||||
Author-email: nick@mendable.ai
|
||||
License: GNU General Public License v3 (GPLv3)
|
||||
Project-URL: Documentation, https://docs.firecrawl.dev
|
||||
Project-URL: Source, https://github.com/mendableai/firecrawl
|
||||
Project-URL: Tracker, https://github.com/mendableai/firecrawl/issues
|
||||
Keywords: SDK API firecrawl
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Environment :: Web Environment
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Topic :: Internet
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
||||
Classifier: Topic :: Software Development
|
||||
Classifier: Topic :: Software Development :: Libraries
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing
|
||||
Classifier: Topic :: Text Processing :: Indexing
|
||||
Requires-Python: >=3.8
|
||||
Description-Content-Type: text/markdown
|
||||
|
||||
# Firecrawl Python SDK
|
||||
|
||||
The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
||||
|
||||
## Installation
|
||||
|
||||
To install the Firecrawl Python SDK, you can use pip:
|
||||
|
||||
```bash
|
||||
pip install firecrawl-py
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
|
||||
2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
|
||||
|
||||
|
||||
Here's an example of how to use the SDK:
|
||||
|
||||
```python
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Initialize the FirecrawlApp with your API key
|
||||
app = FirecrawlApp(api_key='your_api_key')
|
||||
|
||||
# Scrape a single URL
|
||||
url = 'https://mendable.ai'
|
||||
scraped_data = app.scrape_url(url)
|
||||
|
||||
# Crawl a website
|
||||
crawl_url = 'https://mendable.ai'
|
||||
params = {
|
||||
'pageOptions': {
|
||||
'onlyMainContent': True
|
||||
}
|
||||
}
|
||||
crawl_result = app.crawl_url(crawl_url, params=params)
|
||||
```
|
||||
|
||||
### Scraping a URL
|
||||
|
||||
To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
|
||||
|
||||
```python
|
||||
url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```python
|
||||
class ArticleSchema(BaseModel):
|
||||
title: str
|
||||
points: int
|
||||
by: str
|
||||
commentsURL: str
|
||||
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||
|
||||
data = app.scrape_url('https://news.ycombinator.com', {
|
||||
'extractorOptions': {
|
||||
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||
'mode': 'llm-extraction'
|
||||
},
|
||||
'pageOptions':{
|
||||
'onlyMainContent': True
|
||||
}
|
||||
})
|
||||
print(data["llm_extraction"])
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Used to search the web, get the most relevant results, scrap each page and return the markdown.
|
||||
|
||||
```python
|
||||
query = 'what is mendable?'
|
||||
search_result = app.search(query)
|
||||
```
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||
|
||||
The `wait_until_done` parameter determines whether the method should wait for the crawl job to complete before returning the result. If set to `True`, the method will periodically check the status of the crawl job until it is completed or the specified `timeout` (in seconds) is reached. If set to `False`, the method will return immediately with the job ID, and you can manually check the status of the crawl job using the `check_crawl_status` method.
|
||||
|
||||
```python
|
||||
crawl_url = 'https://example.com'
|
||||
params = {
|
||||
'crawlerOptions': {
|
||||
'excludes': ['blog/*'],
|
||||
'includes': [], # leave empty for all pages
|
||||
'limit': 1000,
|
||||
},
|
||||
'pageOptions': {
|
||||
'onlyMainContent': True
|
||||
}
|
||||
}
|
||||
crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
|
||||
```
|
||||
|
||||
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.
|
||||
|
||||
### Checking Crawl Status
|
||||
|
||||
To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
|
||||
|
||||
```python
|
||||
job_id = crawl_result['jobId']
|
||||
status = app.check_crawl_status(job_id)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
||||
|
||||
## Running the Tests with Pytest
|
||||
|
||||
To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
|
||||
|
||||
### Running the Tests
|
||||
|
||||
To run the tests, execute the following commands:
|
||||
|
||||
Install pytest:
|
||||
```bash
|
||||
pip install pytest
|
||||
```
|
||||
|
||||
Run:
|
||||
```bash
|
||||
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||
```
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||
|
||||
## License
|
||||
|
||||
The Firecrawl Python SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
@ -1 +1,3 @@
|
||||
requests
|
||||
pytest
|
||||
python-dotenv
|
||||
|
3
apps/python-sdk/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests
|
||||
pytest
|
||||
python-dotenv
|
@ -1,14 +1,52 @@
|
||||
from setuptools import setup, find_packages
|
||||
from pathlib import Path
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
this_directory = Path(__file__).parent
|
||||
long_description_content = (this_directory / "README.md").read_text()
|
||||
|
||||
setup(
|
||||
name='firecrawl-py',
|
||||
version='0.0.9',
|
||||
url='https://github.com/mendableai/firecrawl',
|
||||
author='Mendable.ai',
|
||||
author_email='nick@mendable.ai',
|
||||
description='Python SDK for Firecrawl API',
|
||||
name="firecrawl-py",
|
||||
version="0.0.12",
|
||||
url="https://github.com/mendableai/firecrawl",
|
||||
author="Mendable.ai",
|
||||
author_email="nick@mendable.ai",
|
||||
description="Python SDK for Firecrawl API",
|
||||
long_description=long_description_content,
|
||||
long_description_content_type="text/markdown",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
'requests',
|
||||
'pytest',
|
||||
'python-dotenv',
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
classifiers=[
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Web Environment",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Topic :: Internet",
|
||||
"Topic :: Internet :: WWW/HTTP",
|
||||
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
||||
"Topic :: Software Development",
|
||||
"Topic :: Software Development :: Libraries",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
"Topic :: Text Processing",
|
||||
"Topic :: Text Processing :: Indexing",
|
||||
],
|
||||
keywords="SDK API firecrawl",
|
||||
project_urls={
|
||||
"Documentation": "https://docs.firecrawl.dev",
|
||||
"Source": "https://github.com/mendableai/firecrawl",
|
||||
"Tracker": "https://github.com/mendableai/firecrawl/issues",
|
||||
},
|
||||
license="GNU General Public License v3 (GPLv3)",
|
||||
)
|
||||
|
3
examples/roastmywebsite/.eslintrc.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"extends": "next/core-web-vitals"
|
||||
}
|
38
examples/roastmywebsite/.gitignore
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
.yarn/install-state.gz
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
.env
|
||||
node_modules
|
5
examples/roastmywebsite/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Roast My Website 🔥
|
||||
|
||||
Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them.
|
||||
|
||||
Check it out at roastmywebsite.ai 😈
|
17
examples/roastmywebsite/components.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"$schema": "https://ui.shadcn.com/schema.json",
|
||||
"style": "default",
|
||||
"rsc": true,
|
||||
"tsx": true,
|
||||
"tailwind": {
|
||||
"config": "tailwind.config.ts",
|
||||
"css": "src/app/globals.css",
|
||||
"baseColor": "zinc",
|
||||
"cssVariables": false,
|
||||
"prefix": ""
|
||||
},
|
||||
"aliases": {
|
||||
"components": "@/components",
|
||||
"utils": "@/lib/utils"
|
||||
}
|
||||
}
|
11
examples/roastmywebsite/next.config.mjs
Normal file
@ -0,0 +1,11 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
env: {
|
||||
G1: process.env.G1,
|
||||
G2: process.env.G2,
|
||||
G3: process.env.G3,
|
||||
G4: process.env.G4,
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
6617
examples/roastmywebsite/package-lock.json
generated
Normal file
53
examples/roastmywebsite/package.json
Normal file
@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "roastmywebsite",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@dqbd/tiktoken": "^1.0.15",
|
||||
"@headlessui/react": "^2.0.4",
|
||||
"@headlessui/tailwindcss": "^0.2.0",
|
||||
"@mendable/firecrawl-js": "^0.0.21",
|
||||
"@radix-ui/react-dialog": "^1.0.5",
|
||||
"@radix-ui/react-dropdown-menu": "^2.0.6",
|
||||
"@radix-ui/react-select": "^2.0.0",
|
||||
"@radix-ui/react-slot": "^1.0.2",
|
||||
"@radix-ui/react-switch": "^1.0.3",
|
||||
"@remixicon/react": "^4.2.0",
|
||||
"@tremor/react": "^3.17.2",
|
||||
"@vercel/analytics": "^1.3.1",
|
||||
"axios": "^1.7.2",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"clsx": "^2.1.1",
|
||||
"cubic-spline": "^3.0.3",
|
||||
"html2canvas": "^1.4.1",
|
||||
"image-size": "^1.1.1",
|
||||
"lucide": "^0.379.0",
|
||||
"lucide-react": "^0.379.0",
|
||||
"next": "14.2.3",
|
||||
"next-themes": "^0.3.0",
|
||||
"openai": "^4.47.3",
|
||||
"react": "^18",
|
||||
"react-dom": "^18",
|
||||
"sonner": "^1.4.41",
|
||||
"tailwind-merge": "^2.3.0",
|
||||
"tailwindcss-animate": "^1.0.7",
|
||||
"tiktoken": "^1.0.15"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/forms": "^0.5.7",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^18",
|
||||
"@types/react-dom": "^18",
|
||||
"eslint": "^8",
|
||||
"eslint-config-next": "14.2.3",
|
||||
"postcss": "^8",
|
||||
"tailwindcss": "^3.4.3",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
8
examples/roastmywebsite/postcss.config.mjs
Normal file
@ -0,0 +1,8 @@
|
||||
/** @type {import('postcss-load-config').Config} */
|
||||
const config = {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
},
|
||||
};
|
||||
|
||||
export default config;
|
BIN
examples/roastmywebsite/public/android-chrome-192x192.png
Normal file
After Width: | Height: | Size: 7.8 KiB |
BIN
examples/roastmywebsite/public/android-chrome-512x512.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
examples/roastmywebsite/public/apple-touch-icon.png
Normal file
After Width: | Height: | Size: 7.0 KiB |
BIN
examples/roastmywebsite/public/bgd.png
Normal file
After Width: | Height: | Size: 444 KiB |
BIN
examples/roastmywebsite/public/favicon-16x16.png
Normal file
After Width: | Height: | Size: 492 B |
BIN
examples/roastmywebsite/public/favicon-32x32.png
Normal file
After Width: | Height: | Size: 997 B |
BIN
examples/roastmywebsite/public/favicon.ico
Normal file
After Width: | Height: | Size: 15 KiB |
1
examples/roastmywebsite/public/next.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
|
After Width: | Height: | Size: 1.3 KiB |
BIN
examples/roastmywebsite/public/og.png
Normal file
After Width: | Height: | Size: 262 KiB |
1
examples/roastmywebsite/public/site.webmanifest
Normal file
@ -0,0 +1 @@
|
||||
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
|
1
examples/roastmywebsite/public/vercel.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>
|
After Width: | Height: | Size: 629 B |
BIN
examples/roastmywebsite/src/app/favicon.ico
Normal file
After Width: | Height: | Size: 15 KiB |
10
examples/roastmywebsite/src/app/globals.css
Normal file
@ -0,0 +1,10 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
|
||||
|
||||
.fill-tremor-content-emphasis {
|
||||
fill: rgb(113 113 122) !important;
|
||||
}
|
||||
|
5
examples/roastmywebsite/src/app/hooks/useGithubStars.ts
Normal file
@ -0,0 +1,5 @@
|
||||
export async function useGithubStars() {
|
||||
const res = await fetch("https://api.github.com/repos/mendableai/firecrawl");
|
||||
const data = await res.json();
|
||||
return data.stargazers_count;
|
||||
}
|
68
examples/roastmywebsite/src/app/layout.tsx
Normal file
@ -0,0 +1,68 @@
|
||||
import type { Metadata } from "next";
|
||||
import { Gloria_Hallelujah } from "next/font/google";
|
||||
import "./globals.css";
|
||||
import { Toaster } from "sonner";
|
||||
import { Analytics } from "@vercel/analytics/react";
|
||||
import { useEffect, useState } from "react";
|
||||
import Head from "next/head";
|
||||
|
||||
|
||||
const inter = Gloria_Hallelujah({ weight: "400", subsets: ["latin"] });
|
||||
// const inter = Inter({ subsets: ["latin"] });
|
||||
|
||||
const meta = {
|
||||
title: "Roast My Website",
|
||||
description:
|
||||
"Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 😈",
|
||||
cardImage: "/og.png",
|
||||
robots: "follow, index",
|
||||
favicon: "/favicon.ico",
|
||||
url: "https://www.roastmywebsite.ai/",
|
||||
};
|
||||
|
||||
export async function generateMetadata(): Promise<Metadata> {
|
||||
return {
|
||||
title: meta.title,
|
||||
description: meta.description,
|
||||
referrer: "origin-when-cross-origin",
|
||||
keywords: ["Roast My Website", "Roast", "Website", "GitHub", "Firecrawl"],
|
||||
authors: [
|
||||
{ name: "Roast My Website", url: "https://www.roastmywebsite.ai/" },
|
||||
],
|
||||
creator: "Roast My Website",
|
||||
publisher: "Roast My Website",
|
||||
robots: meta.robots,
|
||||
icons: { icon: meta.favicon },
|
||||
metadataBase: new URL(meta.url),
|
||||
openGraph: {
|
||||
url: meta.url,
|
||||
title: meta.title,
|
||||
description: meta.description,
|
||||
images: [meta.cardImage],
|
||||
type: "website",
|
||||
siteName: meta.title,
|
||||
},
|
||||
twitter: {
|
||||
card: "summary_large_image",
|
||||
site: "@Vercel",
|
||||
creator: "@Vercel",
|
||||
title: meta.title,
|
||||
description: meta.description,
|
||||
images: [meta.cardImage],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: Readonly<{
|
||||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body className={inter.className}>{children}</body>
|
||||
<Analytics />
|
||||
<Toaster />
|
||||
</html>
|
||||
);
|
||||
}
|
16
examples/roastmywebsite/src/app/page.tsx
Normal file
@ -0,0 +1,16 @@
|
||||
// pages/index.tsx
|
||||
import MainComponent from "@/components/main";
|
||||
import { useGithubStars } from "./hooks/useGithubStars";
|
||||
import GithubButton from "@/components/github-button";
|
||||
|
||||
export default async function Home() {
|
||||
const githubStars = await useGithubStars();
|
||||
return (
|
||||
<div className="relative">
|
||||
<div className="hidden md:flex z-10 absolute top-4 right-4 p-4">
|
||||
<GithubButton githubStars={githubStars} />
|
||||
</div>
|
||||
<MainComponent />
|
||||
</div>
|
||||
);
|
||||
}
|
20
examples/roastmywebsite/src/components/github-button.tsx
Normal file
@ -0,0 +1,20 @@
|
||||
"use client";
|
||||
import { Github } from "lucide-react";
|
||||
import { Button } from "./ui/button";
|
||||
|
||||
export default function GithubButton({ githubStars }: { githubStars: number }) {
|
||||
return (
|
||||
<Button
|
||||
onClick={() => {
|
||||
window.open("https://github.com/mendableai/firecrawl", "_blank");
|
||||
}}
|
||||
variant="outline"
|
||||
size="icon"
|
||||
className="px-3 w-22 gap-2"
|
||||
>
|
||||
<Github className="h-4 w-4" />{" "}
|
||||
{/* {githubStars ? `Star us on GitHub` : "GitHub"} */}
|
||||
See code on Github
|
||||
</Button>
|
||||
);
|
||||
}
|
113
examples/roastmywebsite/src/components/main.tsx
Normal file
@ -0,0 +1,113 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { Theme, allThemes } from "@/lib/theme";
|
||||
import html2canvas from "html2canvas";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "./ui/input";
|
||||
import { Github } from "lucide-react";
|
||||
|
||||
export default function MainComponent() {
|
||||
const [roastUrl, setRoastUrl] = useState("");
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [roastData, setRoastData] = useState("");
|
||||
|
||||
const [spiceLevel, setSpiceLevel] = useState(2);
|
||||
|
||||
return (
|
||||
<div
|
||||
className="h-screen"
|
||||
style={{
|
||||
background: `linear-gradient(to bottom right, rgba(255, 255, 255, 0.75) 58%, #fff, red )`,
|
||||
}}
|
||||
>
|
||||
<main className="relative flex h-[95vh] flex-col items-center justify-center bg-transparent bg-opacity-80">
|
||||
<div className="w-3/4 flex flex-col items-center gap-4">
|
||||
<h1 className="font-inter-tight text-4xl lg:text-5xl xl:text-6xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-zinc-500 via-zinc-900 to-zinc-900 pb-4 text-center">
|
||||
<em className="relative px-1 italic animate-text transition-all text-transparent bg-clip-text bg-gradient-to-tr from-red-600 to-red-400 inline-flex justify-center items-center text-6xl lg:text-7xl xl:text-8xl">
|
||||
Roast
|
||||
</em>
|
||||
<br />{" "}
|
||||
<span className="text-4xl lg:text-5xl xl:text-6xl">My Website</span>
|
||||
</h1>
|
||||
<div className="flex flex-col sm:flex-row items-center gap-4 justify-center w-3/5">
|
||||
<Input
|
||||
type="text"
|
||||
className="w-full p-2 border border-gray-300 rounded r"
|
||||
placeholder="https://coconut.com/"
|
||||
value={roastUrl}
|
||||
onChange={(e) => setRoastUrl(e.target.value)}
|
||||
/>
|
||||
<Button
|
||||
className="px-6 py-2 bg-red-500/25 text-red-500 0 rounded-lg hover:bg-red-300 w-1/8 whitespace-nowrap"
|
||||
onClick={async () => {
|
||||
if (roastUrl) {
|
||||
setLoading(true);
|
||||
try {
|
||||
const response = await fetch(
|
||||
`/api/roastWebsite?url=${encodeURIComponent(
|
||||
roastUrl
|
||||
)}&spiceLevel=${spiceLevel}`
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Error: ${response.statusText}`);
|
||||
}
|
||||
const data = await response.json();
|
||||
setRoastData(data.roastResult);
|
||||
} catch (error) {
|
||||
console.error("Error:", error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
}}
|
||||
>
|
||||
{loading ? "Loading..." : "Get Roasted 🌶️"}
|
||||
</Button>
|
||||
</div>
|
||||
<div className="flex items-center justify-center mt-4">
|
||||
<label
|
||||
htmlFor="spice-level"
|
||||
className="mr-4 font-medium text-gray-700"
|
||||
>
|
||||
Choose your roast level:
|
||||
</label>
|
||||
<select
|
||||
id="spice-level"
|
||||
className="cursor-pointer rounded-lg border border-gray-300 bg-white py-2 px-4 text-center shadow-sm focus:border-orange-500 focus:outline-none focus:ring-1 focus:ring-orange-500"
|
||||
value={spiceLevel}
|
||||
onChange={(e) => setSpiceLevel(Number(e.target.value))}
|
||||
>
|
||||
<option value={1}>Mild 🌶️</option>
|
||||
<option value={2}>Medium 🌶️🌶️</option>
|
||||
<option value={3}>Spicy 🌶️🌶️🌶️</option>
|
||||
</select>
|
||||
</div>
|
||||
{loading ? (
|
||||
<div className="mt-4 w-3/5 p-4 border border-gray-300 rounded shadow bg-gradient-to-r from-red-500 to-red-400 animate-pulse">
|
||||
<p className="text-white text-center">Preparing your roast...</p>
|
||||
</div>
|
||||
) : (
|
||||
roastData && (
|
||||
<div className="!font-sans mt-4 w-3/5 p-4 border border-gray-300 rounded shadow">
|
||||
<p>{roastData}</p>
|
||||
</div>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={`fixed bottom-0 left-0 right-0 p-4 text-white text-center font-light flex justify-center items-center gap-4`}
|
||||
>
|
||||
<a
|
||||
href="https://firecrawl.dev"
|
||||
target="_blank"
|
||||
className="text-black hover:text-orange-400"
|
||||
style={{ textShadow: "2px 2px 4px rgba(0, 0, 0, 0.15)" }}
|
||||
>
|
||||
A demo web scraping and vision extraction from Firecrawl 🔥
|
||||
</a>
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
56
examples/roastmywebsite/src/components/ui/button.tsx
Normal file
@ -0,0 +1,56 @@
|
||||
import * as React from "react"
|
||||
import { Slot } from "@radix-ui/react-slot"
|
||||
import { cva, type VariantProps } from "class-variance-authority"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const buttonVariants = cva(
|
||||
"inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 dark:ring-offset-zinc-950 dark:focus-visible:ring-zinc-300",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
default: "bg-zinc-900 text-zinc-50 hover:bg-zinc-900/90 dark:bg-zinc-50 dark:text-zinc-900 dark:hover:bg-zinc-50/90",
|
||||
destructive:
|
||||
"bg-red-500 text-zinc-50 hover:bg-red-500/90 dark:bg-red-900 dark:text-zinc-50 dark:hover:bg-red-900/90",
|
||||
outline:
|
||||
"border border-zinc-200 bg-white hover:bg-zinc-100 hover:text-zinc-900 dark:border-zinc-800 dark:bg-zinc-950 dark:hover:bg-zinc-800 dark:hover:text-zinc-50",
|
||||
secondary:
|
||||
"bg-zinc-100 text-zinc-900 hover:bg-zinc-100/80 dark:bg-zinc-800 dark:text-zinc-50 dark:hover:bg-zinc-800/80",
|
||||
ghost: "hover:bg-zinc-100 hover:text-zinc-900 dark:hover:bg-zinc-800 dark:hover:text-zinc-50",
|
||||
link: "text-zinc-900 underline-offset-4 hover:underline dark:text-zinc-50",
|
||||
},
|
||||
size: {
|
||||
default: "h-10 px-4 py-2",
|
||||
sm: "h-9 rounded-md px-3",
|
||||
lg: "h-11 rounded-md px-8",
|
||||
icon: "h-10 w-10",
|
||||
},
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: "default",
|
||||
size: "default",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
export interface ButtonProps
|
||||
extends React.ButtonHTMLAttributes<HTMLButtonElement>,
|
||||
VariantProps<typeof buttonVariants> {
|
||||
asChild?: boolean
|
||||
}
|
||||
|
||||
const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
|
||||
({ className, variant, size, asChild = false, ...props }, ref) => {
|
||||
const Comp = asChild ? Slot : "button"
|
||||
return (
|
||||
<Comp
|
||||
className={cn(buttonVariants({ variant, size, className }))}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
)
|
||||
Button.displayName = "Button"
|
||||
|
||||
export { Button, buttonVariants }
|
122
examples/roastmywebsite/src/components/ui/dialog.tsx
Normal file
@ -0,0 +1,122 @@
|
||||
"use client"
|
||||
|
||||
import * as React from "react"
|
||||
import * as DialogPrimitive from "@radix-ui/react-dialog"
|
||||
import { X } from "lucide-react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const Dialog = DialogPrimitive.Root
|
||||
|
||||
const DialogTrigger = DialogPrimitive.Trigger
|
||||
|
||||
const DialogPortal = DialogPrimitive.Portal
|
||||
|
||||
const DialogClose = DialogPrimitive.Close
|
||||
|
||||
const DialogOverlay = React.forwardRef<
|
||||
React.ElementRef<typeof DialogPrimitive.Overlay>,
|
||||
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DialogPrimitive.Overlay
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"fixed inset-0 z-50 bg-black/80 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DialogOverlay.displayName = DialogPrimitive.Overlay.displayName
|
||||
|
||||
const DialogContent = React.forwardRef<
|
||||
React.ElementRef<typeof DialogPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content>
|
||||
>(({ className, children, ...props }, ref) => (
|
||||
<DialogPortal>
|
||||
<DialogOverlay />
|
||||
<DialogPrimitive.Content
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-zinc-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg dark:border-zinc-800 dark:bg-zinc-950",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<DialogPrimitive.Close className="absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-zinc-950 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-zinc-100 data-[state=open]:text-zinc-500 dark:ring-offset-zinc-950 dark:focus:ring-zinc-300 dark:data-[state=open]:bg-zinc-800 dark:data-[state=open]:text-zinc-400">
|
||||
<X className="h-4 w-4" />
|
||||
<span className="sr-only">Close</span>
|
||||
</DialogPrimitive.Close>
|
||||
</DialogPrimitive.Content>
|
||||
</DialogPortal>
|
||||
))
|
||||
DialogContent.displayName = DialogPrimitive.Content.displayName
|
||||
|
||||
const DialogHeader = ({
|
||||
className,
|
||||
...props
|
||||
}: React.HTMLAttributes<HTMLDivElement>) => (
|
||||
<div
|
||||
className={cn(
|
||||
"flex flex-col space-y-1.5 text-center sm:text-left",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
DialogHeader.displayName = "DialogHeader"
|
||||
|
||||
const DialogFooter = ({
|
||||
className,
|
||||
...props
|
||||
}: React.HTMLAttributes<HTMLDivElement>) => (
|
||||
<div
|
||||
className={cn(
|
||||
"flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
DialogFooter.displayName = "DialogFooter"
|
||||
|
||||
const DialogTitle = React.forwardRef<
|
||||
React.ElementRef<typeof DialogPrimitive.Title>,
|
||||
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Title>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DialogPrimitive.Title
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"text-lg font-semibold leading-none tracking-tight",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DialogTitle.displayName = DialogPrimitive.Title.displayName
|
||||
|
||||
const DialogDescription = React.forwardRef<
|
||||
React.ElementRef<typeof DialogPrimitive.Description>,
|
||||
React.ComponentPropsWithoutRef<typeof DialogPrimitive.Description>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DialogPrimitive.Description
|
||||
ref={ref}
|
||||
className={cn("text-sm text-zinc-500 dark:text-zinc-400", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DialogDescription.displayName = DialogPrimitive.Description.displayName
|
||||
|
||||
export {
|
||||
Dialog,
|
||||
DialogPortal,
|
||||
DialogOverlay,
|
||||
DialogClose,
|
||||
DialogTrigger,
|
||||
DialogContent,
|
||||
DialogHeader,
|
||||
DialogFooter,
|
||||
DialogTitle,
|
||||
DialogDescription,
|
||||
}
|
200
examples/roastmywebsite/src/components/ui/dropdown-menu.tsx
Normal file
@ -0,0 +1,200 @@
|
||||
"use client"
|
||||
|
||||
import * as React from "react"
|
||||
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu"
|
||||
import { Check, ChevronRight, Circle } from "lucide-react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const DropdownMenu = DropdownMenuPrimitive.Root
|
||||
|
||||
const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger
|
||||
|
||||
const DropdownMenuGroup = DropdownMenuPrimitive.Group
|
||||
|
||||
const DropdownMenuPortal = DropdownMenuPrimitive.Portal
|
||||
|
||||
const DropdownMenuSub = DropdownMenuPrimitive.Sub
|
||||
|
||||
const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup
|
||||
|
||||
const DropdownMenuSubTrigger = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, children, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.SubTrigger
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-zinc-100 data-[state=open]:bg-zinc-100 dark:focus:bg-zinc-800 dark:data-[state=open]:bg-zinc-800",
|
||||
inset && "pl-8",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronRight className="ml-auto h-4 w-4" />
|
||||
</DropdownMenuPrimitive.SubTrigger>
|
||||
))
|
||||
DropdownMenuSubTrigger.displayName =
|
||||
DropdownMenuPrimitive.SubTrigger.displayName
|
||||
|
||||
const DropdownMenuSubContent = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.SubContent
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"z-50 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white p-1 text-zinc-950 shadow-lg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuSubContent.displayName =
|
||||
DropdownMenuPrimitive.SubContent.displayName
|
||||
|
||||
const DropdownMenuContent = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
|
||||
>(({ className, sideOffset = 4, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Portal>
|
||||
<DropdownMenuPrimitive.Content
|
||||
ref={ref}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"z-50 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white p-1 text-zinc-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</DropdownMenuPrimitive.Portal>
|
||||
))
|
||||
DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName
|
||||
|
||||
const DropdownMenuItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Item>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Item
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
|
||||
inset && "pl-8",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName
|
||||
|
||||
const DropdownMenuCheckboxItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
|
||||
>(({ className, children, checked, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.CheckboxItem
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
|
||||
className
|
||||
)}
|
||||
checked={checked}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<Check className="h-4 w-4" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.CheckboxItem>
|
||||
))
|
||||
DropdownMenuCheckboxItem.displayName =
|
||||
DropdownMenuPrimitive.CheckboxItem.displayName
|
||||
|
||||
const DropdownMenuRadioItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>
|
||||
>(({ className, children, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.RadioItem
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<Circle className="h-2 w-2 fill-current" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.RadioItem>
|
||||
))
|
||||
DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName
|
||||
|
||||
const DropdownMenuLabel = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Label>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Label
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"px-2 py-1.5 text-sm font-semibold",
|
||||
inset && "pl-8",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName
|
||||
|
||||
const DropdownMenuSeparator = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Separator
|
||||
ref={ref}
|
||||
className={cn("-mx-1 my-1 h-px bg-zinc-100 dark:bg-zinc-800", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName
|
||||
|
||||
const DropdownMenuShortcut = ({
|
||||
className,
|
||||
...props
|
||||
}: React.HTMLAttributes<HTMLSpanElement>) => {
|
||||
return (
|
||||
<span
|
||||
className={cn("ml-auto text-xs tracking-widest opacity-60", className)}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
DropdownMenuShortcut.displayName = "DropdownMenuShortcut"
|
||||
|
||||
export {
|
||||
DropdownMenu,
|
||||
DropdownMenuTrigger,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuCheckboxItem,
|
||||
DropdownMenuRadioItem,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuShortcut,
|
||||
DropdownMenuGroup,
|
||||
DropdownMenuPortal,
|
||||
DropdownMenuSub,
|
||||
DropdownMenuSubContent,
|
||||
DropdownMenuSubTrigger,
|
||||
DropdownMenuRadioGroup,
|
||||
}
|
25
examples/roastmywebsite/src/components/ui/input.tsx
Normal file
@ -0,0 +1,25 @@
|
||||
import * as React from "react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
export interface InputProps
|
||||
extends React.InputHTMLAttributes<HTMLInputElement> {}
|
||||
|
||||
const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
({ className, type, ...props }, ref) => {
|
||||
return (
|
||||
<input
|
||||
type={type}
|
||||
className={cn(
|
||||
"flex h-10 w-full rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-zinc-500 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus-visible:ring-zinc-300",
|
||||
className
|
||||
)}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
)
|
||||
Input.displayName = "Input"
|
||||
|
||||
export { Input }
|
160
examples/roastmywebsite/src/components/ui/select.tsx
Normal file
@ -0,0 +1,160 @@
|
||||
"use client"
|
||||
|
||||
import * as React from "react"
|
||||
import * as SelectPrimitive from "@radix-ui/react-select"
|
||||
import { Check, ChevronDown, ChevronUp } from "lucide-react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const Select = SelectPrimitive.Root
|
||||
|
||||
const SelectGroup = SelectPrimitive.Group
|
||||
|
||||
const SelectValue = SelectPrimitive.Value
|
||||
|
||||
const SelectTrigger = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.Trigger>,
|
||||
Omit<React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>, 'noIcon'> & { noIcon?: boolean }
|
||||
>(({ className, children, noIcon, ...props }, ref) => (
|
||||
<SelectPrimitive.Trigger
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"flex h-10 w-full items-center justify-between rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white placeholder:text-zinc-500 focus:outline-none focus:ring-2 focus:ring-zinc-950 focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus:ring-zinc-300",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<SelectPrimitive.Icon asChild>
|
||||
{noIcon ? <></> : <ChevronDown className="h-4 w-4 opacity-50" />}
|
||||
</SelectPrimitive.Icon>
|
||||
</SelectPrimitive.Trigger>
|
||||
))
|
||||
SelectTrigger.displayName = SelectPrimitive.Trigger.displayName
|
||||
|
||||
const SelectScrollUpButton = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.ScrollUpButton>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollUpButton>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SelectPrimitive.ScrollUpButton
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"flex cursor-default items-center justify-center py-1",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ChevronUp className="h-4 w-4" />
|
||||
</SelectPrimitive.ScrollUpButton>
|
||||
))
|
||||
SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName
|
||||
|
||||
const SelectScrollDownButton = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.ScrollDownButton>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollDownButton>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SelectPrimitive.ScrollDownButton
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"flex cursor-default items-center justify-center py-1",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<ChevronDown className="h-4 w-4" />
|
||||
</SelectPrimitive.ScrollDownButton>
|
||||
))
|
||||
SelectScrollDownButton.displayName =
|
||||
SelectPrimitive.ScrollDownButton.displayName
|
||||
|
||||
const SelectContent = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content>
|
||||
>(({ className, children, position = "popper", ...props }, ref) => (
|
||||
<SelectPrimitive.Portal>
|
||||
<SelectPrimitive.Content
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative z-50 max-h-96 min-w-[8rem] overflow-hidden rounded-md border border-zinc-200 bg-white text-zinc-950 shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 dark:border-zinc-800 dark:bg-zinc-950 dark:text-zinc-50",
|
||||
position === "popper" &&
|
||||
"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
|
||||
className
|
||||
)}
|
||||
position={position}
|
||||
{...props}
|
||||
>
|
||||
<SelectScrollUpButton />
|
||||
<SelectPrimitive.Viewport
|
||||
className={cn(
|
||||
"p-1",
|
||||
position === "popper" &&
|
||||
"h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)]"
|
||||
)}
|
||||
>
|
||||
{children}
|
||||
</SelectPrimitive.Viewport>
|
||||
<SelectScrollDownButton />
|
||||
</SelectPrimitive.Content>
|
||||
</SelectPrimitive.Portal>
|
||||
))
|
||||
SelectContent.displayName = SelectPrimitive.Content.displayName
|
||||
|
||||
const SelectLabel = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.Label>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Label>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SelectPrimitive.Label
|
||||
ref={ref}
|
||||
className={cn("py-1.5 pl-8 pr-2 text-sm font-semibold", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
SelectLabel.displayName = SelectPrimitive.Label.displayName
|
||||
|
||||
const SelectItem = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.Item>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item>
|
||||
>(({ className, children, ...props }, ref) => (
|
||||
<SelectPrimitive.Item
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex w-full cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none focus:bg-zinc-100 focus:text-zinc-900 data-[disabled]:pointer-events-none data-[disabled]:opacity-50 dark:focus:bg-zinc-800 dark:focus:text-zinc-50",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||
<SelectPrimitive.ItemIndicator>
|
||||
<Check className="h-4 w-4" />
|
||||
</SelectPrimitive.ItemIndicator>
|
||||
</span>
|
||||
|
||||
<SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
|
||||
</SelectPrimitive.Item>
|
||||
))
|
||||
SelectItem.displayName = SelectPrimitive.Item.displayName
|
||||
|
||||
const SelectSeparator = React.forwardRef<
|
||||
React.ElementRef<typeof SelectPrimitive.Separator>,
|
||||
React.ComponentPropsWithoutRef<typeof SelectPrimitive.Separator>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SelectPrimitive.Separator
|
||||
ref={ref}
|
||||
className={cn("-mx-1 my-1 h-px bg-zinc-100 dark:bg-zinc-800", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
SelectSeparator.displayName = SelectPrimitive.Separator.displayName
|
||||
|
||||
export {
|
||||
Select,
|
||||
SelectGroup,
|
||||
SelectValue,
|
||||
SelectTrigger,
|
||||
SelectContent,
|
||||
SelectLabel,
|
||||
SelectItem,
|
||||
SelectSeparator,
|
||||
SelectScrollUpButton,
|
||||
SelectScrollDownButton,
|
||||
}
|
31
examples/roastmywebsite/src/components/ui/sonner.tsx
Normal file
@ -0,0 +1,31 @@
|
||||
"use client"
|
||||
|
||||
import { useTheme } from "next-themes"
|
||||
import { Toaster as Sonner } from "sonner"
|
||||
|
||||
type ToasterProps = React.ComponentProps<typeof Sonner>
|
||||
|
||||
const Toaster = ({ ...props }: ToasterProps) => {
|
||||
const { theme = "system" } = useTheme()
|
||||
|
||||
return (
|
||||
<Sonner
|
||||
theme={theme as ToasterProps["theme"]}
|
||||
className="toaster group"
|
||||
toastOptions={{
|
||||
classNames: {
|
||||
toast:
|
||||
"group toast group-[.toaster]:bg-white group-[.toaster]:text-zinc-950 group-[.toaster]:border-zinc-200 group-[.toaster]:shadow-lg dark:group-[.toaster]:bg-zinc-950 dark:group-[.toaster]:text-zinc-50 dark:group-[.toaster]:border-zinc-800",
|
||||
description: "group-[.toast]:text-zinc-500 dark:group-[.toast]:text-zinc-400",
|
||||
actionButton:
|
||||
"group-[.toast]:bg-zinc-900 group-[.toast]:text-zinc-50 dark:group-[.toast]:bg-zinc-50 dark:group-[.toast]:text-zinc-900",
|
||||
cancelButton:
|
||||
"group-[.toast]:bg-zinc-100 group-[.toast]:text-zinc-500 dark:group-[.toast]:bg-zinc-800 dark:group-[.toast]:text-zinc-400",
|
||||
},
|
||||
}}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
export { Toaster }
|
29
examples/roastmywebsite/src/components/ui/switch.tsx
Normal file
@ -0,0 +1,29 @@
|
||||
"use client"
|
||||
|
||||
import * as React from "react"
|
||||
import * as SwitchPrimitives from "@radix-ui/react-switch"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const Switch = React.forwardRef<
|
||||
React.ElementRef<typeof SwitchPrimitives.Root>,
|
||||
React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<SwitchPrimitives.Root
|
||||
className={cn(
|
||||
"peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 focus-visible:ring-offset-white disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-zinc-900 data-[state=unchecked]:bg-zinc-200 dark:focus-visible:ring-zinc-300 dark:focus-visible:ring-offset-zinc-950 dark:data-[state=checked]:bg-zinc-50 dark:data-[state=unchecked]:bg-zinc-800",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
ref={ref}
|
||||
>
|
||||
<SwitchPrimitives.Thumb
|
||||
className={cn(
|
||||
"pointer-events-none block h-5 w-5 rounded-full bg-white shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 dark:bg-zinc-950"
|
||||
)}
|
||||
/>
|
||||
</SwitchPrimitives.Root>
|
||||
))
|
||||
Switch.displayName = SwitchPrimitives.Root.displayName
|
||||
|
||||
export { Switch }
|
24
examples/roastmywebsite/src/components/ui/textarea.tsx
Normal file
@ -0,0 +1,24 @@
|
||||
import * as React from "react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
export interface TextareaProps
|
||||
extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {}
|
||||
|
||||
const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
|
||||
({ className, ...props }, ref) => {
|
||||
return (
|
||||
<textarea
|
||||
className={cn(
|
||||
"flex min-h-[80px] w-full rounded-md border border-zinc-200 bg-white px-3 py-2 text-sm ring-offset-white placeholder:text-zinc-500 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-zinc-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 dark:border-zinc-800 dark:bg-zinc-950 dark:ring-offset-zinc-950 dark:placeholder:text-zinc-400 dark:focus-visible:ring-zinc-300",
|
||||
className
|
||||
)}
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
)
|
||||
Textarea.displayName = "Textarea"
|
||||
|
||||
export { Textarea }
|
75
examples/roastmywebsite/src/lib/LLM/llm.ts
Normal file
@ -0,0 +1,75 @@
|
||||
import OpenAI from "openai";
|
||||
import { encoding_for_model } from "@dqbd/tiktoken";
|
||||
|
||||
/**
|
||||
* Function to generate a roast for a website based on its screenshot and markdown content.
|
||||
* @param roastPrompt - Initial prompt text for the roast.
|
||||
* @param screenshotUrl - URL of the screenshot of the website.
|
||||
* @param content - Raw markdown content of the website.
|
||||
*/
|
||||
export async function roastPrompt(roastPrompt: string, screenshotUrl: string, content: string) {
|
||||
try {
|
||||
// Initialize OpenAI with the API key from environment variables
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPEN_AI_KEY
|
||||
});
|
||||
|
||||
|
||||
let contentTruncated = await truncateContentToFit(content, 30000);
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: "gpt-4o",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: roastPrompt },
|
||||
{ type: "image_url", image_url: { url: screenshotUrl, detail: "low" } },
|
||||
{ type: "text", text: contentTruncated }
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
// Return the first choice's message instead of logging it
|
||||
return response.choices[0].message.content;
|
||||
} catch (error) {
|
||||
console.error("Error generating roast:", error);
|
||||
// Assert error as an instance of Error to access message property
|
||||
return `Error generating roast: ${(error as Error).message}`;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
export function numTokensFromString(message: string, model: string): number {
|
||||
const encoder = encoding_for_model(model as any);
|
||||
|
||||
// Encode the message into tokens
|
||||
const tokens = encoder.encode(message);
|
||||
|
||||
// Free the encoder resources after use
|
||||
encoder.free();
|
||||
|
||||
// Return the number of tokens
|
||||
return tokens.length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async function truncateContentToFit(content: string, maxTokens: number): Promise<string> {
|
||||
const modifier = 4;
|
||||
|
||||
let contentTotruncate = content;
|
||||
const numTokens = numTokensFromString(contentTotruncate, "gpt-4");
|
||||
|
||||
if (numTokens > maxTokens) {
|
||||
// trim the document to the maximum number of tokens, tokens != characters
|
||||
contentTotruncate = content.slice(0, (maxTokens * modifier));
|
||||
|
||||
}
|
||||
return contentTotruncate
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
6
examples/roastmywebsite/src/lib/utils.ts
Normal file
@ -0,0 +1,6 @@
|
||||
import { type ClassValue, clsx } from "clsx"
|
||||
import { twMerge } from "tailwind-merge"
|
||||
|
||||
export function cn(...inputs: ClassValue[]) {
|
||||
return twMerge(clsx(inputs))
|
||||
}
|
76
examples/roastmywebsite/src/pages/api/roastWebsite.ts
Normal file
@ -0,0 +1,76 @@
|
||||
// pages/api/getRepoStars.ts
|
||||
import { NextApiRequest, NextApiResponse } from 'next';
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
import { roastPrompt } from '../../lib/LLM/llm';
|
||||
|
||||
|
||||
const mildPrompt = "Give me a light hearted roast for this website:"
|
||||
const mediumPrompt = "Give me a roast for this website:"
|
||||
const spicyPrompt ="Roast this website. The person who made this website wants to be roasted really hard. I mean REALLY hard, don't hold back, and don't worry about hurting their feelings. Be mean but concisely mean. No more than 2 paragraphs."
|
||||
|
||||
|
||||
async function getScreenShotAndHtml(url: string) {
|
||||
const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
|
||||
const scrapeResult = await app.scrapeUrl(url, {
|
||||
pageOptions: {
|
||||
includeHtml: true,
|
||||
screenshot: true
|
||||
}
|
||||
});
|
||||
return {
|
||||
screenshotUrl: scrapeResult.data.metadata.screenshot,
|
||||
htmlContent: scrapeResult.data.html
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
const rawUrl = typeof req.query.url === 'string' ? req.query.url : null;
|
||||
const spiceLevel = typeof req.query.spiceLevel === 'string' ? parseInt(req.query.spiceLevel, 10) : null;
|
||||
|
||||
if (!rawUrl || spiceLevel === null || isNaN(spiceLevel)) {
|
||||
res.status(400).json({ error: 'Invalid query parameters' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const { screenshotUrl, htmlContent } = await getScreenShotAndHtml(rawUrl);
|
||||
|
||||
// Define a roast prompt message
|
||||
let roastMessage: string;
|
||||
|
||||
// Determine the roast message based on the spice level
|
||||
switch (spiceLevel) {
|
||||
case 1:
|
||||
roastMessage = mildPrompt;
|
||||
break;
|
||||
case 2:
|
||||
roastMessage = mediumPrompt;
|
||||
break;
|
||||
case 3:
|
||||
roastMessage = spicyPrompt;
|
||||
break;
|
||||
default:
|
||||
// If an invalid spice level is provided, default to mild roast
|
||||
roastMessage = mildPrompt;
|
||||
res.status(400).json({ error: 'Invalid spice level' });
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Convert HTML content to a markdown-like format for the roast generation
|
||||
// This is a simplified conversion, assuming HTML content is already sanitized and suitable for direct usage
|
||||
|
||||
// Call the roastPrompt function to generate a roast
|
||||
const roastResult = await roastPrompt(roastMessage, screenshotUrl, htmlContent);
|
||||
|
||||
// Log the roast result for debugging
|
||||
// console.log("Roast Result:", roastResult);
|
||||
|
||||
res.status(200).json({ screenshotUrl, htmlContent, roastResult});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
}
|
||||
|
137
examples/roastmywebsite/tailwind.config.ts
Normal file
@ -0,0 +1,137 @@
|
||||
import type { Config } from 'tailwindcss';
|
||||
import colors from 'tailwindcss/colors';
|
||||
|
||||
const config: Config = {
|
||||
darkMode: 'class',
|
||||
content: [
|
||||
'./src/**/*.{js,ts,jsx,tsx}',
|
||||
|
||||
// Path to Tremor module
|
||||
'./node_modules/@tremor/**/*.{js,ts,jsx,tsx}',
|
||||
],
|
||||
theme: {
|
||||
transparent: 'transparent',
|
||||
current: 'currentColor',
|
||||
extend: {
|
||||
colors: {
|
||||
// light mode
|
||||
tremor: {
|
||||
brand: {
|
||||
faint: colors.blue[50],
|
||||
muted: colors.blue[200],
|
||||
subtle: colors.blue[400],
|
||||
DEFAULT: colors.blue[500],
|
||||
emphasis: colors.blue[700],
|
||||
inverted: colors.white,
|
||||
},
|
||||
background: {
|
||||
muted: colors.gray[50],
|
||||
subtle: colors.gray[100],
|
||||
DEFAULT: colors.white,
|
||||
emphasis: colors.gray[700],
|
||||
},
|
||||
border: {
|
||||
DEFAULT: colors.gray[200],
|
||||
},
|
||||
ring: {
|
||||
DEFAULT: colors.gray[200],
|
||||
},
|
||||
content: {
|
||||
subtle: colors.gray[400],
|
||||
DEFAULT: colors.gray[500],
|
||||
emphasis: colors.gray[700],
|
||||
strong: colors.gray[900],
|
||||
inverted: colors.white,
|
||||
},
|
||||
},
|
||||
// dark mode
|
||||
'dark-tremor': {
|
||||
brand: {
|
||||
faint: '#0B1229',
|
||||
muted: colors.blue[950],
|
||||
subtle: colors.blue[800],
|
||||
DEFAULT: colors.blue[500],
|
||||
emphasis: colors.blue[400],
|
||||
inverted: colors.blue[950],
|
||||
},
|
||||
background: {
|
||||
muted: '#131A2B',
|
||||
subtle: colors.gray[800],
|
||||
DEFAULT: colors.gray[900],
|
||||
emphasis: colors.gray[300],
|
||||
},
|
||||
border: {
|
||||
DEFAULT: colors.gray[800],
|
||||
},
|
||||
ring: {
|
||||
DEFAULT: colors.gray[800],
|
||||
},
|
||||
content: {
|
||||
subtle: colors.gray[600],
|
||||
DEFAULT: colors.gray[500],
|
||||
emphasis: colors.gray[200],
|
||||
strong: colors.gray[50],
|
||||
inverted: colors.gray[950],
|
||||
},
|
||||
},
|
||||
},
|
||||
boxShadow: {
|
||||
// light
|
||||
'tremor-input': '0 1px 2px 0 rgb(0 0 0 / 0.05)',
|
||||
'tremor-card':
|
||||
'0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)',
|
||||
'tremor-dropdown':
|
||||
'0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
|
||||
// dark
|
||||
'dark-tremor-input': '0 1px 2px 0 rgb(0 0 0 / 0.05)',
|
||||
'dark-tremor-card':
|
||||
'0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1)',
|
||||
'dark-tremor-dropdown':
|
||||
'0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)',
|
||||
},
|
||||
borderRadius: {
|
||||
'tremor-small': '0.375rem',
|
||||
'tremor-default': '0.5rem',
|
||||
'tremor-full': '9999px',
|
||||
},
|
||||
fontSize: {
|
||||
'tremor-label': ['0.75rem', { lineHeight: '1rem' }],
|
||||
'tremor-default': ['0.875rem', { lineHeight: '1.25rem' }],
|
||||
'tremor-title': ['1.125rem', { lineHeight: '1.75rem' }],
|
||||
'tremor-metric': ['1.875rem', { lineHeight: '2.25rem' }],
|
||||
},
|
||||
},
|
||||
},
|
||||
safelist: [
|
||||
{
|
||||
pattern:
|
||||
/^(bg-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
variants: ['hover', 'ui-selected'],
|
||||
},
|
||||
{
|
||||
pattern:
|
||||
/^(text-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
variants: ['hover', 'ui-selected'],
|
||||
},
|
||||
{
|
||||
pattern:
|
||||
/^(border-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
variants: ['hover', 'ui-selected'],
|
||||
},
|
||||
{
|
||||
pattern:
|
||||
/^(ring-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
},
|
||||
{
|
||||
pattern:
|
||||
/^(stroke-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
},
|
||||
{
|
||||
pattern:
|
||||
/^(fill-(?:slate|gray|zinc|neutral|stone|red|orange|amber|yellow|lime|green|emerald|teal|cyan|sky|blue|indigo|violet|purple|fuchsia|pink|rose)-(?:50|100|200|300|400|500|600|700|800|900|950))$/,
|
||||
},
|
||||
],
|
||||
plugins: [require('@headlessui/tailwindcss'), require('@tailwindcss/forms')],
|
||||
};
|
||||
|
||||
export default config;
|
26
examples/roastmywebsite/tsconfig.json
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|