Merge pull request #298 from mendableai/feat/type-improvements
[Feat] Improvements on response document types
This commit is contained in:
commit
3b6c9a8557
@ -1,11 +1,14 @@
|
|||||||
import FirecrawlApp, { JobStatusResponse } from '@mendable/firecrawl-js';
|
import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||||
|
|
||||||
// Scrape a website:
|
// Scrape a website:
|
||||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||||
|
|
||||||
|
if (scrapeResult.data) {
|
||||||
console.log(scrapeResult.data.content)
|
console.log(scrapeResult.data.content)
|
||||||
|
}
|
||||||
|
|
||||||
// Crawl a website:
|
// Crawl a website:
|
||||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
||||||
@ -23,12 +26,13 @@ while (true) {
|
|||||||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (job.data) {
|
||||||
console.log(job.data[0].content);
|
console.log(job.data[0].content);
|
||||||
|
}
|
||||||
|
|
||||||
// Search for a query:
|
// Search for a query:
|
||||||
const query = 'what is mendable?'
|
const query = 'what is mendable?'
|
||||||
const searchResult = await app.search(query)
|
const searchResult = await app.search(query)
|
||||||
console.log(searchResult)
|
|
||||||
|
|
||||||
// LLM Extraction:
|
// LLM Extraction:
|
||||||
// Define schema to extract contents into using zod schema
|
// Define schema to extract contents into using zod schema
|
||||||
@ -50,7 +54,9 @@ let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
|||||||
extractorOptions: { extractionSchema: zodSchema },
|
extractorOptions: { extractionSchema: zodSchema },
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (llmExtractionResult.data) {
|
||||||
console.log(llmExtractionResult.data.llm_extraction);
|
console.log(llmExtractionResult.data.llm_extraction);
|
||||||
|
}
|
||||||
|
|
||||||
// Define schema to extract contents into using json schema
|
// Define schema to extract contents into using json schema
|
||||||
const jsonSchema = {
|
const jsonSchema = {
|
||||||
@ -80,4 +86,7 @@ llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
|||||||
extractorOptions: { extractionSchema: jsonSchema },
|
extractorOptions: { extractionSchema: jsonSchema },
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (llmExtractionResult.data) {
|
||||||
console.log(llmExtractionResult.data.llm_extraction);
|
console.log(llmExtractionResult.data.llm_extraction);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.26",
|
"version": "0.0.27",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
|
@ -9,6 +9,102 @@ export interface FirecrawlAppConfig {
|
|||||||
apiUrl?: string | null;
|
apiUrl?: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metadata for a Firecrawl document.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocumentMetadata {
|
||||||
|
title?: string;
|
||||||
|
description?: string;
|
||||||
|
language?: string;
|
||||||
|
keywords?: string;
|
||||||
|
robots?: string;
|
||||||
|
ogTitle?: string;
|
||||||
|
ogDescription?: string;
|
||||||
|
ogUrl?: string;
|
||||||
|
ogImage?: string;
|
||||||
|
ogAudio?: string;
|
||||||
|
ogDeterminer?: string;
|
||||||
|
ogLocale?: string;
|
||||||
|
ogLocaleAlternate?: string[];
|
||||||
|
ogSiteName?: string;
|
||||||
|
ogVideo?: string;
|
||||||
|
dctermsCreated?: string;
|
||||||
|
dcDateCreated?: string;
|
||||||
|
dcDate?: string;
|
||||||
|
dctermsType?: string;
|
||||||
|
dcType?: string;
|
||||||
|
dctermsAudience?: string;
|
||||||
|
dctermsSubject?: string;
|
||||||
|
dcSubject?: string;
|
||||||
|
dcDescription?: string;
|
||||||
|
dctermsKeywords?: string;
|
||||||
|
modifiedTime?: string;
|
||||||
|
publishedTime?: string;
|
||||||
|
articleTag?: string;
|
||||||
|
articleSection?: string;
|
||||||
|
sourceURL?: string;
|
||||||
|
pageStatusCode?: number;
|
||||||
|
pageError?: string;
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Document interface for Firecrawl.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocument {
|
||||||
|
id?: string;
|
||||||
|
url?: string;
|
||||||
|
content: string;
|
||||||
|
markdown?: string;
|
||||||
|
html?: string;
|
||||||
|
llm_extraction?: Record<string, any>;
|
||||||
|
createdAt?: Date;
|
||||||
|
updatedAt?: Date;
|
||||||
|
type?: string;
|
||||||
|
metadata: FirecrawlDocumentMetadata;
|
||||||
|
childrenLinks?: string[];
|
||||||
|
provider?: string;
|
||||||
|
warning?: string;
|
||||||
|
|
||||||
|
index?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for scraping operations.
|
||||||
|
*/
|
||||||
|
export interface ScrapeResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: FirecrawlDocument;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for searching operations.
|
||||||
|
*/
|
||||||
|
export interface SearchResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for crawling operations.
|
||||||
|
*/
|
||||||
|
export interface CrawlResponse {
|
||||||
|
success: boolean;
|
||||||
|
jobId?: string;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for job status checks.
|
||||||
|
*/
|
||||||
|
export interface JobStatusResponse {
|
||||||
|
success: boolean;
|
||||||
|
status: string;
|
||||||
|
jobId?: string;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
partial_data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Generic parameter interface.
|
* Generic parameter interface.
|
||||||
*/
|
*/
|
||||||
@ -20,46 +116,6 @@ export interface Params {
|
|||||||
extractionPrompt?: string;
|
extractionPrompt?: string;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Response interface for scraping operations.
|
|
||||||
*/
|
|
||||||
export interface ScrapeResponse {
|
|
||||||
success: boolean;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Response interface for searching operations.
|
|
||||||
*/
|
|
||||||
export interface SearchResponse {
|
|
||||||
success: boolean;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for crawling operations.
|
|
||||||
*/
|
|
||||||
export interface CrawlResponse {
|
|
||||||
success: boolean;
|
|
||||||
jobId?: string;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Response interface for job status checks.
|
|
||||||
*/
|
|
||||||
export interface JobStatusResponse {
|
|
||||||
success: boolean;
|
|
||||||
status: string;
|
|
||||||
jobId?: string;
|
|
||||||
data?: any;
|
|
||||||
partial_data?: any,
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
*/
|
*/
|
||||||
|
129
apps/js-sdk/firecrawl/types/index.d.ts
vendored
129
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -7,6 +7,99 @@ export interface FirecrawlAppConfig {
|
|||||||
apiKey?: string | null;
|
apiKey?: string | null;
|
||||||
apiUrl?: string | null;
|
apiUrl?: string | null;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Metadata for a Firecrawl document.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocumentMetadata {
|
||||||
|
title?: string;
|
||||||
|
description?: string;
|
||||||
|
language?: string;
|
||||||
|
keywords?: string;
|
||||||
|
robots?: string;
|
||||||
|
ogTitle?: string;
|
||||||
|
ogDescription?: string;
|
||||||
|
ogUrl?: string;
|
||||||
|
ogImage?: string;
|
||||||
|
ogAudio?: string;
|
||||||
|
ogDeterminer?: string;
|
||||||
|
ogLocale?: string;
|
||||||
|
ogLocaleAlternate?: string[];
|
||||||
|
ogSiteName?: string;
|
||||||
|
ogVideo?: string;
|
||||||
|
dctermsCreated?: string;
|
||||||
|
dcDateCreated?: string;
|
||||||
|
dcDate?: string;
|
||||||
|
dctermsType?: string;
|
||||||
|
dcType?: string;
|
||||||
|
dctermsAudience?: string;
|
||||||
|
dctermsSubject?: string;
|
||||||
|
dcSubject?: string;
|
||||||
|
dcDescription?: string;
|
||||||
|
dctermsKeywords?: string;
|
||||||
|
modifiedTime?: string;
|
||||||
|
publishedTime?: string;
|
||||||
|
articleTag?: string;
|
||||||
|
articleSection?: string;
|
||||||
|
sourceURL?: string;
|
||||||
|
pageStatusCode?: number;
|
||||||
|
pageError?: string;
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Document interface for Firecrawl.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocument {
|
||||||
|
id?: string;
|
||||||
|
url?: string;
|
||||||
|
content: string;
|
||||||
|
markdown?: string;
|
||||||
|
html?: string;
|
||||||
|
llm_extraction?: Record<string, any>;
|
||||||
|
createdAt?: Date;
|
||||||
|
updatedAt?: Date;
|
||||||
|
type?: string;
|
||||||
|
metadata: FirecrawlDocumentMetadata;
|
||||||
|
childrenLinks?: string[];
|
||||||
|
provider?: string;
|
||||||
|
warning?: string;
|
||||||
|
index?: number;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for scraping operations.
|
||||||
|
*/
|
||||||
|
export interface ScrapeResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: FirecrawlDocument;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for searching operations.
|
||||||
|
*/
|
||||||
|
export interface SearchResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for crawling operations.
|
||||||
|
*/
|
||||||
|
export interface CrawlResponse {
|
||||||
|
success: boolean;
|
||||||
|
jobId?: string;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for job status checks.
|
||||||
|
*/
|
||||||
|
export interface JobStatusResponse {
|
||||||
|
success: boolean;
|
||||||
|
status: string;
|
||||||
|
jobId?: string;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
partial_data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Generic parameter interface.
|
* Generic parameter interface.
|
||||||
*/
|
*/
|
||||||
@ -18,42 +111,6 @@ export interface Params {
|
|||||||
extractionPrompt?: string;
|
extractionPrompt?: string;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Response interface for scraping operations.
|
|
||||||
*/
|
|
||||||
export interface ScrapeResponse {
|
|
||||||
success: boolean;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for searching operations.
|
|
||||||
*/
|
|
||||||
export interface SearchResponse {
|
|
||||||
success: boolean;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for crawling operations.
|
|
||||||
*/
|
|
||||||
export interface CrawlResponse {
|
|
||||||
success: boolean;
|
|
||||||
jobId?: string;
|
|
||||||
data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for job status checks.
|
|
||||||
*/
|
|
||||||
export interface JobStatusResponse {
|
|
||||||
success: boolean;
|
|
||||||
status: string;
|
|
||||||
jobId?: string;
|
|
||||||
data?: any;
|
|
||||||
partial_data?: any;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user