0

Merge pull request #298 from mendableai/feat/type-improvements

[Feat] Improvements on response document types
This commit is contained in:
Nicolas 2024-06-18 11:00:02 -04:00 committed by GitHub
commit 3b6c9a8557
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 198 additions and 76 deletions

View File

@ -1,11 +1,14 @@
import FirecrawlApp, { JobStatusResponse } from '@mendable/firecrawl-js';
import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
import { z } from "zod";
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
// Scrape a website:
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content)
if (scrapeResult.data) {
console.log(scrapeResult.data.content)
}
// Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
@ -23,12 +26,13 @@ while (true) {
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
}
console.log(job.data[0].content);
if (job.data) {
console.log(job.data[0].content);
}
// Search for a query:
const query = 'what is mendable?'
const searchResult = await app.search(query)
console.log(searchResult)
// LLM Extraction:
// Define schema to extract contents into using zod schema
@ -50,7 +54,9 @@ let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: zodSchema },
});
console.log(llmExtractionResult.data.llm_extraction);
if (llmExtractionResult.data) {
console.log(llmExtractionResult.data.llm_extraction);
}
// Define schema to extract contents into using json schema
const jsonSchema = {
@ -80,4 +86,7 @@ llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
extractorOptions: { extractionSchema: jsonSchema },
});
console.log(llmExtractionResult.data.llm_extraction);
if (llmExtractionResult.data) {
console.log(llmExtractionResult.data.llm_extraction);
}

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.26",
"version": "0.0.27",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",

View File

@ -9,6 +9,102 @@ export interface FirecrawlAppConfig {
apiUrl?: string | null;
}
/**
* Metadata for a Firecrawl document.
*/
export interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadata;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: FirecrawlDocument;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: FirecrawlDocument[];
partial_data?: FirecrawlDocument[];
error?: string;
}
/**
* Generic parameter interface.
*/
@ -20,46 +116,6 @@ export interface Params {
extractionPrompt?: string;
};
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any,
error?: string;
}
/**
* Main class for interacting with the Firecrawl API.
*/

View File

@ -7,6 +7,99 @@ export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}
/**
* Metadata for a Firecrawl document.
*/
export interface FirecrawlDocumentMetadata {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dctermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dctermsType?: string;
dcType?: string;
dctermsAudience?: string;
dctermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dctermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/**
* Document interface for Firecrawl.
*/
export interface FirecrawlDocument {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadata;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: FirecrawlDocument;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: FirecrawlDocument[];
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: FirecrawlDocument[];
partial_data?: FirecrawlDocument[];
error?: string;
}
/**
* Generic parameter interface.
*/
@ -18,42 +111,6 @@ export interface Params {
extractionPrompt?: string;
};
}
/**
* Response interface for scraping operations.
*/
export interface ScrapeResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for searching operations.
*/
export interface SearchResponse {
success: boolean;
data?: any;
error?: string;
}
/**
* Response interface for crawling operations.
*/
export interface CrawlResponse {
success: boolean;
jobId?: string;
data?: any;
error?: string;
}
/**
* Response interface for job status checks.
*/
export interface JobStatusResponse {
success: boolean;
status: string;
jobId?: string;
data?: any;
partial_data?: any;
error?: string;
}
/**
* Main class for interacting with the Firecrawl API.
*/