0

Merge branch 'main' into pr/296

This commit is contained in:
Nicolas 2024-06-18 12:55:45 -04:00
commit 26e8bfc23a
5 changed files with 288 additions and 79 deletions

View File

@ -0,0 +1,87 @@
import { getRateLimiter, serverRateLimiter, testSuiteRateLimiter, redisClient } from "./rate-limiter";
import { RateLimiterMode } from "../../src/types";
import { RateLimiterRedis } from "rate-limiter-flexible";
describe("Rate Limiter Service", () => {
beforeAll(async () => {
await redisClient.connect();
});
afterAll(async () => {
await redisClient.disconnect();
});
it("should return the testSuiteRateLimiter for specific tokens", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "a01ccae");
expect(limiter).toBe(testSuiteRateLimiter);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "6254cf9");
expect(limiter2).toBe(testSuiteRateLimiter);
});
it("should return the serverRateLimiter if mode is not found", () => {
const limiter = getRateLimiter("nonexistent" as RateLimiterMode, "someToken");
expect(limiter).toBe(serverRateLimiter);
});
it("should return the correct rate limiter based on mode and plan", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken", "free");
expect(limiter.points).toBe(2);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken", "standard");
expect(limiter2.points).toBe(50);
const limiter3 = getRateLimiter("search" as RateLimiterMode, "someToken", "growth");
expect(limiter3.points).toBe(500);
const limiter4 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "growth");
expect(limiter4.points).toBe(150);
});
it("should return the default rate limiter if plan is not provided", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken");
expect(limiter.points).toBe(3);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken");
expect(limiter2.points).toBe(20);
});
it("should create a new RateLimiterRedis instance with correct parameters", () => {
const keyPrefix = "test-prefix";
const points = 10;
const limiter = new RateLimiterRedis({
storeClient: redisClient,
keyPrefix,
points,
duration: 60,
});
expect(limiter.keyPrefix).toBe(keyPrefix);
expect(limiter.points).toBe(points);
expect(limiter.duration).toBe(60);
});
it("should return the correct rate limiter for 'preview' mode", () => {
const limiter = getRateLimiter("preview" as RateLimiterMode, "someToken", "free");
expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter("preview" as RateLimiterMode, "someToken");
expect(limiter2.points).toBe(5);
});
it("should return the correct rate limiter for 'account' mode", () => {
const limiter = getRateLimiter("account" as RateLimiterMode, "someToken", "free");
expect(limiter.points).toBe(100);
const limiter2 = getRateLimiter("account" as RateLimiterMode, "someToken");
expect(limiter2.points).toBe(100);
});
it("should return the correct rate limiter for 'crawlStatus' mode", () => {
const limiter = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "free");
expect(limiter.points).toBe(150);
const limiter2 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken");
expect(limiter2.points).toBe(150);
});
});

View File

@ -1,11 +1,14 @@
import FirecrawlApp, { JobStatusResponse } from '@mendable/firecrawl-js';
import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
import { z } from "zod";
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
// Scrape a website:
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content)
if (scrapeResult.data) {
console.log(scrapeResult.data.content)
}
// Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
@ -23,12 +26,13 @@ while (true) {
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
}
console.log(job.data[0].content);
if (job.data) {
console.log(job.data[0].content);
}
// Search for a query:
const query = 'what is mendable?'
const searchResult = await app.search(query)
console.log(searchResult)
// LLM Extraction:
// Define schema to extract contents into using zod schema
@ -50,7 +54,9 @@ let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: zodSchema },
});
console.log(llmExtractionResult.data.llm_extraction);
if (llmExtractionResult.data) {
console.log(llmExtractionResult.data.llm_extraction);
}
// Define schema to extract contents into using json schema
const jsonSchema = {
@ -80,4 +86,7 @@ llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: jsonSchema },
});
console.log(llmExtractionResult.data.llm_extraction);
if (llmExtractionResult.data) {
console.log(llmExtractionResult.data.llm_extraction);
}

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.26",
"version": "0.0.27",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",

View File

@ -5,61 +5,117 @@ import { zodToJsonSchema } from "zod-to-json-schema";
* Configuration interface for FirecrawlApp.
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Generic parameter interface.
* Metadata for a Firecrawl document.
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
export interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadata;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
success: boolean;
data?: FirecrawlDocument;
error?: string;
}
/**
* Response interface for searching operations.
*/
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: any;
error?: string;
success: boolean;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for crawling operations.
*/
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
success: boolean;
jobId?: string;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for job status checks.
*/
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any,
error?: string;
success: boolean;
status: string;
jobId?: string;
data?: FirecrawlDocument[];
partial_data?: FirecrawlDocument[];
error?: string;
}
/**
* Generic parameter interface.
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
}
/**
* Main class for interacting with the Firecrawl API.
*/

View File

@ -8,8 +8,101 @@ export interface FirecrawlAppConfig {
apiUrl?: string | null;
}
/**
* Generic parameter interface.
* Metadata for a Firecrawl document.
*/
export interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadata;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: FirecrawlDocument;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: FirecrawlDocument[];
partial_data?: FirecrawlDocument[];
error?: string;
}
/**
* Generic parameter interface.
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
@ -18,42 +111,6 @@ export interface Params {
extractionPrompt?: string;
};
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any;
error?: string;
}
/**
* Main class for interacting with the Firecrawl API.
*/