Merge branch 'main' into pr/296

2024-06-18 12:55:45 -04:00 · 2024-06-18 12:55:45 -04:00 · 26e8bfc23a
commit 26e8bfc23a
parent e5ffda1eec b53ba58bc0
5 changed files with 288 additions and 79 deletions
--- a/apps/api/src/services/rate-limiter.test.ts
+++ b/apps/api/src/services/rate-limiter.test.ts
@ -0,0 +1,87 @@
 import { getRateLimiter, serverRateLimiter, testSuiteRateLimiter, redisClient } from "./rate-limiter";
 import { RateLimiterMode } from "../../src/types";
 import { RateLimiterRedis } from "rate-limiter-flexible";
 describe("Rate Limiter Service", () => {
  beforeAll(async () => {
    await redisClient.connect();
  });
  afterAll(async () => {
    await redisClient.disconnect();
  });
  it("should return the testSuiteRateLimiter for specific tokens", () => {
    const limiter = getRateLimiter("crawl" as RateLimiterMode, "a01ccae");
    expect(limiter).toBe(testSuiteRateLimiter);
    const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "6254cf9");
    expect(limiter2).toBe(testSuiteRateLimiter);
  });
  it("should return the serverRateLimiter if mode is not found", () => {
    const limiter = getRateLimiter("nonexistent" as RateLimiterMode, "someToken");
    expect(limiter).toBe(serverRateLimiter);
  });
  it("should return the correct rate limiter based on mode and plan", () => {
    const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken", "free");
    expect(limiter.points).toBe(2);
    const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken", "standard");
    expect(limiter2.points).toBe(50);
    const limiter3 = getRateLimiter("search" as RateLimiterMode, "someToken", "growth");
    expect(limiter3.points).toBe(500);
    const limiter4 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "growth");
    expect(limiter4.points).toBe(150);
  });
  it("should return the default rate limiter if plan is not provided", () => {
    const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken");
    expect(limiter.points).toBe(3);
    const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken");
    expect(limiter2.points).toBe(20);
  });
  it("should create a new RateLimiterRedis instance with correct parameters", () => {
    const keyPrefix = "test-prefix";
    const points = 10;
    const limiter = new RateLimiterRedis({
      storeClient: redisClient,
      keyPrefix,
      points,
      duration: 60,
    });
    expect(limiter.keyPrefix).toBe(keyPrefix);
    expect(limiter.points).toBe(points);
    expect(limiter.duration).toBe(60);
  });
  it("should return the correct rate limiter for 'preview' mode", () => {
    const limiter = getRateLimiter("preview" as RateLimiterMode, "someToken", "free");
    expect(limiter.points).toBe(5);
    const limiter2 = getRateLimiter("preview" as RateLimiterMode, "someToken");
    expect(limiter2.points).toBe(5);
  });
  it("should return the correct rate limiter for 'account' mode", () => {
    const limiter = getRateLimiter("account" as RateLimiterMode, "someToken", "free");
    expect(limiter.points).toBe(100);
    const limiter2 = getRateLimiter("account" as RateLimiterMode, "someToken");
    expect(limiter2.points).toBe(100);
  });
  it("should return the correct rate limiter for 'crawlStatus' mode", () => {
    const limiter = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "free");
    expect(limiter.points).toBe(150);
    const limiter2 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken");
    expect(limiter2.points).toBe(150);
  });
 });
--- a/apps/js-sdk/example.ts
+++ b/apps/js-sdk/example.ts
@ -1,11 +1,14 @@
-import FirecrawlApp, { JobStatusResponse } from '@mendable/firecrawl-js';
+import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
 import { z } from "zod";
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
 // Scrape a website:
 const scrapeResult = await app.scrapeUrl('firecrawl.dev');
 if (scrapeResult.data) {
  console.log(scrapeResult.data.content)
 }
 // Crawl a website:
 const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
@ -23,12 +26,13 @@ while (true) {
  await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
 }
 if (job.data) {
  console.log(job.data[0].content);
 }
 // Search for a query:
 const query = 'what is mendable?'
 const searchResult = await app.search(query)
 console.log(searchResult)
 // LLM Extraction:
 //  Define schema to extract contents into using zod schema
@ -50,7 +54,9 @@ let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: zodSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data.llm_extraction);
 }
 // Define schema to extract contents into using json schema
 const jsonSchema = {
@ -80,4 +86,7 @@ llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: jsonSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data.llm_extraction);
 }
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,6 +1,6 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "0.0.26",
+  "version": "0.0.27",
  "description": "JavaScript SDK for Firecrawl API",
  "main": "build/index.js",
  "types": "types/index.d.ts",
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -9,6 +9,102 @@ export interface FirecrawlAppConfig {
  apiUrl?: string | null;
 }
 /**
 * Metadata for a Firecrawl document.
 */
 export interface FirecrawlDocumentMetadata {
  title?: string;
  description?: string;
  language?: string;
  keywords?: string;
  robots?: string;
  ogTitle?: string;
  ogDescription?: string;
  ogUrl?: string;
  ogImage?: string;
  ogAudio?: string;
  ogDeterminer?: string;
  ogLocale?: string;
  ogLocaleAlternate?: string[];
  ogSiteName?: string;
  ogVideo?: string;
  dctermsCreated?: string;
  dcDateCreated?: string;
  dcDate?: string;
  dctermsType?: string;
  dcType?: string;
  dctermsAudience?: string;
  dctermsSubject?: string;
  dcSubject?: string;
  dcDescription?: string;
  dctermsKeywords?: string;
  modifiedTime?: string;
  publishedTime?: string;
  articleTag?: string;
  articleSection?: string;
  sourceURL?: string;
  pageStatusCode?: number;
  pageError?: string;
  [key: string]: any;
 }
 /**
 * Document interface for Firecrawl.
 */
 export interface FirecrawlDocument {
  id?: string;
  url?: string;
  content: string;
  markdown?: string;
  html?: string;
  llm_extraction?: Record<string, any>;
  createdAt?: Date;
  updatedAt?: Date;
  type?: string;
  metadata: FirecrawlDocumentMetadata;
  childrenLinks?: string[];
  provider?: string;
  warning?: string;
  index?: number;
 }
 /**
 * Response interface for scraping operations.
 */
 export interface ScrapeResponse {
   success: boolean;
   data?: FirecrawlDocument;
   error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
   success: boolean;
   data?: FirecrawlDocument[];
   error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
 export interface CrawlResponse {
   success: boolean;
   jobId?: string;
   data?: FirecrawlDocument[];
   error?: string;
 }
 /**
 * Response interface for job status checks.
 */
 export interface JobStatusResponse {
   success: boolean;
   status: string;
   jobId?: string;
   data?: FirecrawlDocument[];
   partial_data?: FirecrawlDocument[];
   error?: string;
 }
 /**
  * Generic parameter interface.
  */
@ -20,46 +116,6 @@ export interface Params {
       extractionPrompt?: string;
   };
 }
 /**
 * Response interface for scraping operations.
 */
 export interface ScrapeResponse {
    success: boolean;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
    success: boolean;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
 export interface CrawlResponse {
    success: boolean;
    jobId?: string;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for job status checks.
 */
 export interface JobStatusResponse {
    success: boolean;
    status: string;
    jobId?: string;
    data?: any;
    partial_data?: any,
    error?: string;
 }
 /**
 * Main class for interacting with the Firecrawl API.
 */
--- a/apps/js-sdk/firecrawl/types/index.d.ts
+++ b/apps/js-sdk/firecrawl/types/index.d.ts
@ -7,6 +7,99 @@ export interface FirecrawlAppConfig {
    apiKey?: string | null;
    apiUrl?: string | null;
 }
 /**
 * Metadata for a Firecrawl document.
 */
 export interface FirecrawlDocumentMetadata {
    title?: string;
    description?: string;
    language?: string;
    keywords?: string;
    robots?: string;
    ogTitle?: string;
    ogDescription?: string;
    ogUrl?: string;
    ogImage?: string;
    ogAudio?: string;
    ogDeterminer?: string;
    ogLocale?: string;
    ogLocaleAlternate?: string[];
    ogSiteName?: string;
    ogVideo?: string;
    dctermsCreated?: string;
    dcDateCreated?: string;
    dcDate?: string;
    dctermsType?: string;
    dcType?: string;
    dctermsAudience?: string;
    dctermsSubject?: string;
    dcSubject?: string;
    dcDescription?: string;
    dctermsKeywords?: string;
    modifiedTime?: string;
    publishedTime?: string;
    articleTag?: string;
    articleSection?: string;
    sourceURL?: string;
    pageStatusCode?: number;
    pageError?: string;
    [key: string]: any;
 }
 /**
 * Document interface for Firecrawl.
 */
 export interface FirecrawlDocument {
    id?: string;
    url?: string;
    content: string;
    markdown?: string;
    html?: string;
    llm_extraction?: Record<string, any>;
    createdAt?: Date;
    updatedAt?: Date;
    type?: string;
    metadata: FirecrawlDocumentMetadata;
    childrenLinks?: string[];
    provider?: string;
    warning?: string;
    index?: number;
 }
 /**
 * Response interface for scraping operations.
 */
 export interface ScrapeResponse {
    success: boolean;
    data?: FirecrawlDocument;
    error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
    success: boolean;
    data?: FirecrawlDocument[];
    error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
 export interface CrawlResponse {
    success: boolean;
    jobId?: string;
    data?: FirecrawlDocument[];
    error?: string;
 }
 /**
 * Response interface for job status checks.
 */
 export interface JobStatusResponse {
    success: boolean;
    status: string;
    jobId?: string;
    data?: FirecrawlDocument[];
    partial_data?: FirecrawlDocument[];
    error?: string;
 }
 /**
  * Generic parameter interface.
  */
@ -18,42 +111,6 @@ export interface Params {
        extractionPrompt?: string;
    };
 }
 /**
 * Response interface for scraping operations.
 */
 export interface ScrapeResponse {
    success: boolean;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for searching operations.
 */
 export interface SearchResponse {
    success: boolean;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for crawling operations.
 */
 export interface CrawlResponse {
    success: boolean;
    jobId?: string;
    data?: any;
    error?: string;
 }
 /**
 * Response interface for job status checks.
 */
 export interface JobStatusResponse {
    success: boolean;
    status: string;
    jobId?: string;
    data?: any;
    partial_data?: any;
    error?: string;
 }
 /**
 * Main class for interacting with the Firecrawl API.
 */