Merge branch 'main' into mdp/dotenv_jest
This commit is contained in:
commit
f368e94cee
@ -180,6 +180,15 @@ url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
|
||||
|
||||
```python
|
||||
query = 'what is mendable?'
|
||||
search_result = app.search(query)
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
||||
|
@ -373,33 +373,36 @@
|
||||
"type": "boolean"
|
||||
},
|
||||
"data": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"markdown": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"language": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
},
|
||||
"sourceURL": {
|
||||
"type": "string",
|
||||
"format": "uri"
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"markdown": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"language": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
},
|
||||
"sourceURL": {
|
||||
"type": "string",
|
||||
"format": "uri"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,9 @@ import { SearchResult } from "../../src/lib/entities";
|
||||
import { google_search } from "./googlesearch";
|
||||
import { serper_search } from "./serper";
|
||||
|
||||
|
||||
|
||||
|
||||
export async function search({
|
||||
query,
|
||||
advanced = false,
|
||||
|
@ -61,6 +61,43 @@ export default class FirecrawlApp {
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Searches for a query using the Firecrawl API.
|
||||
* @param {string} query - The query to search for.
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
*/
|
||||
search(query_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { query };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData;
|
||||
}
|
||||
else {
|
||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'search');
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param {string} url - The URL to crawl.
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.14",
|
||||
"version": "0.0.15",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
@ -23,6 +23,14 @@ export interface ScrapeResponse {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
@ -94,6 +102,39 @@ export default class FirecrawlApp {
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches for a query using the Firecrawl API.
|
||||
* @param {string} query - The query to search for.
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
*/
|
||||
async search(query: string, params: Params | null = null): Promise<SearchResponse> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { query };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData;
|
||||
} else {
|
||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'search');
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param {string} url - The URL to crawl.
|
||||
|
15
apps/js-sdk/firecrawl/types/index.d.ts
vendored
15
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -19,6 +19,14 @@ export interface ScrapeResponse {
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
*/
|
||||
export interface SearchResponse {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
*/
|
||||
@ -55,6 +63,13 @@ export default class FirecrawlApp {
|
||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
||||
*/
|
||||
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
||||
/**
|
||||
* Searches for a query using the Firecrawl API.
|
||||
* @param {string} query - The query to search for.
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
*/
|
||||
search(query: string, params?: Params | null): Promise<SearchResponse>;
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param {string} url - The URL to crawl.
|
||||
|
8
apps/js-sdk/package-lock.json
generated
8
apps/js-sdk/package-lock.json
generated
@ -9,14 +9,14 @@
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.8",
|
||||
"@mendable/firecrawl-js": "^0.0.15",
|
||||
"axios": "^1.6.8"
|
||||
}
|
||||
},
|
||||
"node_modules/@mendable/firecrawl-js": {
|
||||
"version": "0.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.8.tgz",
|
||||
"integrity": "sha512-dD7eA5X6UT8CM3z7qCqHgA4YbCsdwmmlaT/L0/ozM6gGvb0PnJMoB+e51+n4lAW8mxXOvHGbq9nrgBT1wEhhhw==",
|
||||
"version": "0.0.15",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz",
|
||||
"integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5"
|
||||
|
@ -11,7 +11,7 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.8",
|
||||
"@mendable/firecrawl-js": "^0.0.15",
|
||||
"axios": "^1.6.8"
|
||||
}
|
||||
}
|
||||
|
@ -1,29 +1,36 @@
|
||||
from fastapi import FastAPI, Response
|
||||
from playwright.async_api import async_playwright
|
||||
import os
|
||||
from fastapi import FastAPI
|
||||
from playwright.async_api import async_playwright, Browser
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
class UrlModel(BaseModel):
|
||||
url: str
|
||||
|
||||
@app.post("/html") # Kept as POST to accept body parameters
|
||||
async def root(body: UrlModel): # Using Pydantic model for request body
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch()
|
||||
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
browser: Browser = None
|
||||
|
||||
await page.goto(body.url) # Adjusted to use the url from the request body model
|
||||
page_content = await page.content() # Get the HTML content of the page
|
||||
|
||||
await context.close()
|
||||
await browser.close()
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
global browser
|
||||
playwright = await async_playwright().start()
|
||||
browser = await playwright.chromium.launch()
|
||||
|
||||
json_compatible_item_data = {"content": page_content}
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
await browser.close()
|
||||
|
||||
|
||||
@app.post("/html")
|
||||
async def root(body: UrlModel):
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
await page.goto(body.url)
|
||||
page_content = await page.content()
|
||||
await context.close()
|
||||
json_compatible_item_data = {"content": page_content}
|
||||
return JSONResponse(content=json_compatible_item_data)
|
||||
|
@ -47,6 +47,15 @@ url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Used to search the web, get the most relevant results, scrap each page and return the markdown.
|
||||
|
||||
```python
|
||||
query = 'what is mendable?'
|
||||
search_result = app.search(query)
|
||||
```
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||
|
@ -32,6 +32,32 @@ class FirecrawlApp:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||
|
||||
def search(self, query, params=None):
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
json_data = {'query': query}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
response = requests.post(
|
||||
'https://api.firecrawl.dev/v0/search',
|
||||
headers=headers,
|
||||
json=json_data
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||
headers = self._prepare_headers()
|
||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
vendored
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -32,6 +32,32 @@ class FirecrawlApp:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||
|
||||
def search(self, query, params=None):
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
json_data = {'query': query}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
response = requests.post(
|
||||
'https://api.firecrawl.dev/v0/search',
|
||||
headers=headers,
|
||||
json=json_data
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||
headers = self._prepare_headers()
|
||||
|
@ -1,7 +1,7 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: firecrawl-py
|
||||
Version: 0.0.5
|
||||
Version: 0.0.6
|
||||
Summary: Python SDK for Firecrawl API
|
||||
Home-page: https://github.com/mendableai/firecrawl-py
|
||||
Home-page: https://github.com/mendableai/firecrawl
|
||||
Author: Mendable.ai
|
||||
Author-email: nick@mendable.ai
|
||||
|
@ -2,8 +2,8 @@ from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='firecrawl-py',
|
||||
version='0.0.5',
|
||||
url='https://github.com/mendableai/firecrawl-py',
|
||||
version='0.0.6',
|
||||
url='https://github.com/mendableai/firecrawl',
|
||||
author='Mendable.ai',
|
||||
author_email='nick@mendable.ai',
|
||||
description='Python SDK for Firecrawl API',
|
||||
|
Loading…
x
Reference in New Issue
Block a user