Merge branch 'main' into mdp/dotenv_jest
This commit is contained in:
commit
f368e94cee
@ -180,6 +180,15 @@ url = 'https://example.com'
|
|||||||
scraped_data = app.scrape_url(url)
|
scraped_data = app.scrape_url(url)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Search for a query
|
||||||
|
|
||||||
|
Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
|
||||||
|
|
||||||
|
```python
|
||||||
|
query = 'what is mendable?'
|
||||||
|
search_result = app.search(query)
|
||||||
|
```
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request.
|
||||||
|
@ -373,33 +373,36 @@
|
|||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"data": {
|
"data": {
|
||||||
"type": "object",
|
"type": "array",
|
||||||
"properties": {
|
"items": {
|
||||||
"url": {
|
"type": "object",
|
||||||
"type": "string"
|
"properties": {
|
||||||
},
|
"url": {
|
||||||
"markdown": {
|
"type": "string"
|
||||||
"type": "string"
|
},
|
||||||
},
|
"markdown": {
|
||||||
"content": {
|
"type": "string"
|
||||||
"type": "string"
|
},
|
||||||
},
|
"content": {
|
||||||
"metadata": {
|
"type": "string"
|
||||||
"type": "object",
|
},
|
||||||
"properties": {
|
"metadata": {
|
||||||
"title": {
|
"type": "object",
|
||||||
"type": "string"
|
"properties": {
|
||||||
},
|
"title": {
|
||||||
"description": {
|
"type": "string"
|
||||||
"type": "string"
|
},
|
||||||
},
|
"description": {
|
||||||
"language": {
|
"type": "string"
|
||||||
"type": "string",
|
},
|
||||||
"nullable": true
|
"language": {
|
||||||
},
|
"type": "string",
|
||||||
"sourceURL": {
|
"nullable": true
|
||||||
"type": "string",
|
},
|
||||||
"format": "uri"
|
"sourceURL": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "uri"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,9 @@ import { SearchResult } from "../../src/lib/entities";
|
|||||||
import { google_search } from "./googlesearch";
|
import { google_search } from "./googlesearch";
|
||||||
import { serper_search } from "./serper";
|
import { serper_search } from "./serper";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export async function search({
|
export async function search({
|
||||||
query,
|
query,
|
||||||
advanced = false,
|
advanced = false,
|
||||||
|
@ -61,6 +61,43 @@ export default class FirecrawlApp {
|
|||||||
return { success: false, error: 'Internal server error.' };
|
return { success: false, error: 'Internal server error.' };
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Searches for a query using the Firecrawl API.
|
||||||
|
* @param {string} query - The query to search for.
|
||||||
|
* @param {Params | null} params - Additional parameters for the search request.
|
||||||
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||||
|
*/
|
||||||
|
search(query_1) {
|
||||||
|
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
||||||
|
const headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${this.apiKey}`,
|
||||||
|
};
|
||||||
|
let jsonData = { query };
|
||||||
|
if (params) {
|
||||||
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||||
|
if (response.status === 200) {
|
||||||
|
const responseData = response.data;
|
||||||
|
if (responseData.success) {
|
||||||
|
return responseData;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.handleError(response, 'search');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
throw new Error(error.message);
|
||||||
|
}
|
||||||
|
return { success: false, error: 'Internal server error.' };
|
||||||
|
});
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param {string} url - The URL to crawl.
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.14",
|
"version": "0.0.15",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
|
@ -23,6 +23,14 @@ export interface ScrapeResponse {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response interface for searching operations.
|
||||||
|
*/
|
||||||
|
export interface SearchResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: any;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for crawling operations.
|
* Response interface for crawling operations.
|
||||||
*/
|
*/
|
||||||
@ -94,6 +102,39 @@ export default class FirecrawlApp {
|
|||||||
return { success: false, error: 'Internal server error.' };
|
return { success: false, error: 'Internal server error.' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searches for a query using the Firecrawl API.
|
||||||
|
* @param {string} query - The query to search for.
|
||||||
|
* @param {Params | null} params - Additional parameters for the search request.
|
||||||
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||||
|
*/
|
||||||
|
async search(query: string, params: Params | null = null): Promise<SearchResponse> {
|
||||||
|
const headers: AxiosRequestHeaders = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${this.apiKey}`,
|
||||||
|
} as AxiosRequestHeaders;
|
||||||
|
let jsonData: Params = { query };
|
||||||
|
if (params) {
|
||||||
|
jsonData = { ...jsonData, ...params };
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||||
|
if (response.status === 200) {
|
||||||
|
const responseData = response.data;
|
||||||
|
if (responseData.success) {
|
||||||
|
return responseData;
|
||||||
|
} else {
|
||||||
|
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.handleError(response, 'search');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new Error(error.message);
|
||||||
|
}
|
||||||
|
return { success: false, error: 'Internal server error.' };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param {string} url - The URL to crawl.
|
||||||
|
15
apps/js-sdk/firecrawl/types/index.d.ts
vendored
15
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -19,6 +19,14 @@ export interface ScrapeResponse {
|
|||||||
data?: any;
|
data?: any;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for searching operations.
|
||||||
|
*/
|
||||||
|
export interface SearchResponse {
|
||||||
|
success: boolean;
|
||||||
|
data?: any;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for crawling operations.
|
* Response interface for crawling operations.
|
||||||
*/
|
*/
|
||||||
@ -55,6 +63,13 @@ export default class FirecrawlApp {
|
|||||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
||||||
|
/**
|
||||||
|
* Searches for a query using the Firecrawl API.
|
||||||
|
* @param {string} query - The query to search for.
|
||||||
|
* @param {Params | null} params - Additional parameters for the search request.
|
||||||
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||||
|
*/
|
||||||
|
search(query: string, params?: Params | null): Promise<SearchResponse>;
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param {string} url - The URL to crawl.
|
||||||
|
8
apps/js-sdk/package-lock.json
generated
8
apps/js-sdk/package-lock.json
generated
@ -9,14 +9,14 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@mendable/firecrawl-js": "^0.0.8",
|
"@mendable/firecrawl-js": "^0.0.15",
|
||||||
"axios": "^1.6.8"
|
"axios": "^1.6.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@mendable/firecrawl-js": {
|
"node_modules/@mendable/firecrawl-js": {
|
||||||
"version": "0.0.8",
|
"version": "0.0.15",
|
||||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.15.tgz",
|
||||||
"integrity": "sha512-dD7eA5X6UT8CM3z7qCqHgA4YbCsdwmmlaT/L0/ozM6gGvb0PnJMoB+e51+n4lAW8mxXOvHGbq9nrgBT1wEhhhw==",
|
"integrity": "sha512-e3iCCrLIiEh+jEDerGV9Uhdkn8ymo+sG+k3osCwPg51xW1xUdAnmlcHrcJoR43RvKXdvD/lqoxg8odUEsqyH+w==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
"dotenv": "^16.4.5"
|
"dotenv": "^16.4.5"
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@mendable/firecrawl-js": "^0.0.8",
|
"@mendable/firecrawl-js": "^0.0.15",
|
||||||
"axios": "^1.6.8"
|
"axios": "^1.6.8"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,29 +1,36 @@
|
|||||||
from fastapi import FastAPI, Response
|
from fastapi import FastAPI
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright, Browser
|
||||||
import os
|
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
class UrlModel(BaseModel):
|
class UrlModel(BaseModel):
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
@app.post("/html") # Kept as POST to accept body parameters
|
|
||||||
async def root(body: UrlModel): # Using Pydantic model for request body
|
|
||||||
async with async_playwright() as p:
|
|
||||||
browser = await p.chromium.launch()
|
|
||||||
|
|
||||||
context = await browser.new_context()
|
browser: Browser = None
|
||||||
page = await context.new_page()
|
|
||||||
|
|
||||||
await page.goto(body.url) # Adjusted to use the url from the request body model
|
|
||||||
page_content = await page.content() # Get the HTML content of the page
|
|
||||||
|
|
||||||
await context.close()
|
@app.on_event("startup")
|
||||||
await browser.close()
|
async def startup_event():
|
||||||
|
global browser
|
||||||
|
playwright = await async_playwright().start()
|
||||||
|
browser = await playwright.chromium.launch()
|
||||||
|
|
||||||
json_compatible_item_data = {"content": page_content}
|
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_event():
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/html")
|
||||||
|
async def root(body: UrlModel):
|
||||||
|
context = await browser.new_context()
|
||||||
|
page = await context.new_page()
|
||||||
|
await page.goto(body.url)
|
||||||
|
page_content = await page.content()
|
||||||
|
await context.close()
|
||||||
|
json_compatible_item_data = {"content": page_content}
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
@ -47,6 +47,15 @@ url = 'https://example.com'
|
|||||||
scraped_data = app.scrape_url(url)
|
scraped_data = app.scrape_url(url)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Search for a query
|
||||||
|
|
||||||
|
Used to search the web, get the most relevant results, scrap each page and return the markdown.
|
||||||
|
|
||||||
|
```python
|
||||||
|
query = 'what is mendable?'
|
||||||
|
search_result = app.search(query)
|
||||||
|
```
|
||||||
|
|
||||||
### Crawling a Website
|
### Crawling a Website
|
||||||
|
|
||||||
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||||
|
@ -32,6 +32,32 @@ class FirecrawlApp:
|
|||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||||
|
|
||||||
|
def search(self, query, params=None):
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {self.api_key}'
|
||||||
|
}
|
||||||
|
json_data = {'query': query}
|
||||||
|
if params:
|
||||||
|
json_data.update(params)
|
||||||
|
response = requests.post(
|
||||||
|
'https://api.firecrawl.dev/v0/search',
|
||||||
|
headers=headers,
|
||||||
|
json=json_data
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
response = response.json()
|
||||||
|
if response['success'] == True:
|
||||||
|
return response['data']
|
||||||
|
else:
|
||||||
|
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||||
|
|
||||||
|
elif response.status_code in [402, 409, 500]:
|
||||||
|
error_message = response.json().get('error', 'Unknown error occurred')
|
||||||
|
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
||||||
|
else:
|
||||||
|
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||||
|
|
||||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl-py-0.0.5.tar.gz
vendored
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -32,6 +32,32 @@ class FirecrawlApp:
|
|||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||||
|
|
||||||
|
def search(self, query, params=None):
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {self.api_key}'
|
||||||
|
}
|
||||||
|
json_data = {'query': query}
|
||||||
|
if params:
|
||||||
|
json_data.update(params)
|
||||||
|
response = requests.post(
|
||||||
|
'https://api.firecrawl.dev/v0/search',
|
||||||
|
headers=headers,
|
||||||
|
json=json_data
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
response = response.json()
|
||||||
|
if response['success'] == True:
|
||||||
|
return response['data']
|
||||||
|
else:
|
||||||
|
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||||
|
|
||||||
|
elif response.status_code in [402, 409, 500]:
|
||||||
|
error_message = response.json().get('error', 'Unknown error occurred')
|
||||||
|
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
||||||
|
else:
|
||||||
|
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||||
|
|
||||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: firecrawl-py
|
Name: firecrawl-py
|
||||||
Version: 0.0.5
|
Version: 0.0.6
|
||||||
Summary: Python SDK for Firecrawl API
|
Summary: Python SDK for Firecrawl API
|
||||||
Home-page: https://github.com/mendableai/firecrawl-py
|
Home-page: https://github.com/mendableai/firecrawl
|
||||||
Author: Mendable.ai
|
Author: Mendable.ai
|
||||||
Author-email: nick@mendable.ai
|
Author-email: nick@mendable.ai
|
||||||
|
@ -2,8 +2,8 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='firecrawl-py',
|
name='firecrawl-py',
|
||||||
version='0.0.5',
|
version='0.0.6',
|
||||||
url='https://github.com/mendableai/firecrawl-py',
|
url='https://github.com/mendableai/firecrawl',
|
||||||
author='Mendable.ai',
|
author='Mendable.ai',
|
||||||
author_email='nick@mendable.ai',
|
author_email='nick@mendable.ai',
|
||||||
description='Python SDK for Firecrawl API',
|
description='Python SDK for Firecrawl API',
|
||||||
|
Loading…
Reference in New Issue
Block a user