Merge branch 'test-sdks' of https://github.com/mendableai/firecrawl into test-sdks
This commit is contained in:
commit
667d3e4c4f
@ -27,6 +27,13 @@ kill_timeout = '5s'
|
||||
hard_limit = 200
|
||||
soft_limit = 100
|
||||
|
||||
[[http_service.checks]]
|
||||
grace_period = "10s"
|
||||
interval = "30s"
|
||||
method = "GET"
|
||||
timeout = "5s"
|
||||
path = "/"
|
||||
|
||||
[[services]]
|
||||
protocol = 'tcp'
|
||||
internal_port = 8080
|
||||
|
@ -1,12 +1,12 @@
|
||||
import { parseApi } from "../../src/lib/parseApi";
|
||||
import { getRateLimiter, } from "../../src/services/rate-limiter";
|
||||
import { getRateLimiter, } from "../../src/services/rate-limiter";
|
||||
import { AuthResponse, RateLimiterMode } from "../../src/types";
|
||||
import { supabase_service } from "../../src/services/supabase";
|
||||
import { withAuth } from "../../src/lib/withAuth";
|
||||
import { RateLimiterRedis } from "rate-limiter-flexible";
|
||||
import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
|
||||
|
||||
export async function authenticateUser(req, res, mode?: RateLimiterMode) : Promise<AuthResponse> {
|
||||
export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
|
||||
return withAuth(supaAuthenticateUser)(req, res, mode);
|
||||
}
|
||||
function setTrace(team_id: string, api_key: string) {
|
||||
@ -18,7 +18,7 @@ function setTrace(team_id: string, api_key: string) {
|
||||
} catch (error) {
|
||||
console.error('Error setting trace attributes:', error);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
export async function supaAuthenticateUser(
|
||||
req,
|
||||
@ -97,7 +97,7 @@ export async function supaAuthenticateUser(
|
||||
team_id: team_id,
|
||||
plan: plan
|
||||
}
|
||||
switch (mode) {
|
||||
switch (mode) {
|
||||
case RateLimiterMode.Crawl:
|
||||
rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token, subscriptionData.plan);
|
||||
break;
|
||||
@ -126,9 +126,11 @@ export async function supaAuthenticateUser(
|
||||
await rateLimiter.consume(iptoken);
|
||||
} catch (rateLimiterRes) {
|
||||
console.error(rateLimiterRes);
|
||||
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
|
||||
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);
|
||||
return {
|
||||
success: false,
|
||||
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
|
||||
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Please retry after ${secs}s, resets at ${retryDate}`,
|
||||
status: 429,
|
||||
};
|
||||
}
|
||||
@ -155,9 +157,9 @@ export async function supaAuthenticateUser(
|
||||
normalizedApi = parseApi(token);
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("api_keys")
|
||||
.select("*")
|
||||
.eq("key", normalizedApi);
|
||||
.from("api_keys")
|
||||
.select("*")
|
||||
.eq("key", normalizedApi);
|
||||
|
||||
if (error || !data || data.length === 0) {
|
||||
return {
|
||||
@ -170,7 +172,7 @@ export async function supaAuthenticateUser(
|
||||
subscriptionData = data[0];
|
||||
}
|
||||
|
||||
return { success: true, team_id: subscriptionData.team_id };
|
||||
return { success: true, team_id: subscriptionData.team_id };
|
||||
}
|
||||
|
||||
function getPlanByPriceId(price_id: string) {
|
||||
|
@ -28,11 +28,13 @@ export async function searchHelper(
|
||||
|
||||
const tbs = searchOptions.tbs ?? null;
|
||||
const filter = searchOptions.filter ?? null;
|
||||
const num_results = searchOptions.limit ?? 7;
|
||||
const num_results_buffer = Math.floor(num_results * 1.5);
|
||||
|
||||
let res = await search({
|
||||
query: query,
|
||||
advanced: advanced,
|
||||
num_results: searchOptions.limit ?? 7,
|
||||
num_results: num_results_buffer,
|
||||
tbs: tbs,
|
||||
filter: filter,
|
||||
lang: searchOptions.lang ?? "en",
|
||||
@ -47,6 +49,9 @@ export async function searchHelper(
|
||||
}
|
||||
|
||||
res = res.filter((r) => !isUrlBlocked(r.url));
|
||||
if (res.length > num_results) {
|
||||
res = res.slice(0, num_results);
|
||||
}
|
||||
|
||||
if (res.length === 0) {
|
||||
return { success: true, error: "No search results found", returnCode: 200 };
|
||||
|
@ -1,7 +1,6 @@
|
||||
const socialMediaBlocklist = [
|
||||
'facebook.com',
|
||||
'twitter.com',
|
||||
'x.com',
|
||||
'instagram.com',
|
||||
'linkedin.com',
|
||||
'pinterest.com',
|
||||
|
@ -1,25 +1,57 @@
|
||||
"""
|
||||
FirecrawlApp Module
|
||||
|
||||
This module provides a class `FirecrawlApp` for interacting with the Firecrawl API.
|
||||
It includes methods to scrape URLs, perform searches, initiate and monitor crawl jobs,
|
||||
and check the status of these jobs. The module uses requests for HTTP communication
|
||||
and handles retries for certain HTTP status codes.
|
||||
|
||||
Classes:
|
||||
- FirecrawlApp: Main class for interacting with the Firecrawl API.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
import requests
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class FirecrawlApp:
|
||||
def __init__(self, api_key=None, api_url='https://api.firecrawl.dev'):
|
||||
"""
|
||||
Initialize the FirecrawlApp instance.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): API key for authenticating with the Firecrawl API.
|
||||
api_url (Optional[str]): Base URL for the Firecrawl API.
|
||||
"""
|
||||
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
|
||||
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
|
||||
if self.api_key is None:
|
||||
raise ValueError('No API key provided')
|
||||
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL')
|
||||
|
||||
|
||||
|
||||
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
|
||||
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||
"""
|
||||
Scrape the specified URL using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
url (str): The URL to scrape.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the scrape request.
|
||||
|
||||
Returns:
|
||||
Any: The scraped data if the request is successful.
|
||||
|
||||
Raises:
|
||||
Exception: If the scrape request fails.
|
||||
"""
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
# Prepare the base scrape parameters with the URL
|
||||
scrape_params = {'url': url}
|
||||
|
||||
|
||||
# If there are additional params, process them
|
||||
if params:
|
||||
# Initialize extractorOptions if present
|
||||
@ -32,7 +64,7 @@ class FirecrawlApp:
|
||||
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
||||
# Update the scrape_params with the processed extractorOptions
|
||||
scrape_params['extractorOptions'] = extractor_options
|
||||
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key != 'extractorOptions':
|
||||
@ -41,11 +73,11 @@ class FirecrawlApp:
|
||||
response = requests.post(
|
||||
f'{self.api_url}/v0/scrape',
|
||||
headers=headers,
|
||||
json=scrape_params
|
||||
json=scrape_params,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success']:
|
||||
if response['success'] and 'data' in response:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||
@ -54,8 +86,21 @@ class FirecrawlApp:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
||||
|
||||
|
||||
def search(self, query, params=None):
|
||||
"""
|
||||
Perform a search using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the search request.
|
||||
|
||||
Returns:
|
||||
Any: The search results if the request is successful.
|
||||
|
||||
Raises:
|
||||
Exception: If the search request fails.
|
||||
"""
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
@ -70,19 +115,36 @@ class FirecrawlApp:
|
||||
)
|
||||
if response.status_code == 200:
|
||||
response = response.json()
|
||||
if response['success'] == True:
|
||||
|
||||
if response['success'] and 'data' in response:
|
||||
return response['data']
|
||||
else:
|
||||
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||
|
||||
|
||||
elif response.status_code in [402, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
||||
else:
|
||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
||||
headers = self._prepare_headers()
|
||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
|
||||
"""
|
||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
url (str): The URL to crawl.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
||||
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
||||
timeout (int): Timeout between status checks when waiting for job completion.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Any: The crawl job ID or the crawl results if waiting until completion.
|
||||
|
||||
Raises:
|
||||
Exception: If the crawl job initiation or monitoring fails.
|
||||
"""
|
||||
headers = self._prepare_headers(idempotency_key)
|
||||
json_data = {'url': url}
|
||||
if params:
|
||||
json_data.update(params)
|
||||
@ -97,6 +159,18 @@ class FirecrawlApp:
|
||||
self._handle_error(response, 'start crawl job')
|
||||
|
||||
def check_crawl_status(self, job_id):
|
||||
"""
|
||||
Check the status of a crawl job using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
job_id (str): The ID of the crawl job.
|
||||
|
||||
Returns:
|
||||
Any: The status of the crawl job.
|
||||
|
||||
Raises:
|
||||
Exception: If the status check request fails.
|
||||
"""
|
||||
headers = self._prepare_headers()
|
||||
response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
|
||||
if response.status_code == 200:
|
||||
@ -104,13 +178,45 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'check crawl status')
|
||||
|
||||
def _prepare_headers(self):
|
||||
def _prepare_headers(self, idempotency_key=None):
|
||||
"""
|
||||
Prepare the headers for API requests.
|
||||
|
||||
Args:
|
||||
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
|
||||
"""
|
||||
if idempotency_key:
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
'x-idempotency-key': idempotency_key
|
||||
}
|
||||
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
}
|
||||
|
||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||
"""
|
||||
Make a POST request with retries.
|
||||
|
||||
Args:
|
||||
url (str): The URL to send the POST request to.
|
||||
data (Dict[str, Any]): The JSON data to include in the POST request.
|
||||
headers (Dict[str, str]): The headers to include in the POST request.
|
||||
retries (int): Number of retries for the request.
|
||||
backoff_factor (float): Backoff factor for retries.
|
||||
|
||||
Returns:
|
||||
requests.Response: The response from the POST request.
|
||||
|
||||
Raises:
|
||||
requests.RequestException: If the request fails after the specified retries.
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
if response.status_code == 502:
|
||||
@ -120,6 +226,21 @@ class FirecrawlApp:
|
||||
return response
|
||||
|
||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
|
||||
"""
|
||||
Make a GET request with retries.
|
||||
|
||||
Args:
|
||||
url (str): The URL to send the GET request to.
|
||||
headers (Dict[str, str]): The headers to include in the GET request.
|
||||
retries (int): Number of retries for the request.
|
||||
backoff_factor (float): Backoff factor for retries.
|
||||
|
||||
Returns:
|
||||
requests.Response: The response from the GET request.
|
||||
|
||||
Raises:
|
||||
requests.RequestException: If the request fails after the specified retries.
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 502:
|
||||
@ -129,7 +250,20 @@ class FirecrawlApp:
|
||||
return response
|
||||
|
||||
def _monitor_job_status(self, job_id, headers, timeout):
|
||||
import time
|
||||
"""
|
||||
Monitor the status of a crawl job until completion.
|
||||
|
||||
Args:
|
||||
job_id (str): The ID of the crawl job.
|
||||
headers (Dict[str, str]): The headers to include in the status check requests.
|
||||
timeout (int): Timeout between status checks.
|
||||
|
||||
Returns:
|
||||
Any: The crawl results if the job is completed successfully.
|
||||
|
||||
Raises:
|
||||
Exception: If the job fails or an error occurs during status checks.
|
||||
"""
|
||||
while True:
|
||||
status_response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
|
||||
if status_response.status_code == 200:
|
||||
@ -140,8 +274,7 @@ class FirecrawlApp:
|
||||
else:
|
||||
raise Exception('Crawl job completed but no data was returned')
|
||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued']:
|
||||
if timeout < 2:
|
||||
timeout = 2
|
||||
timeout=max(timeout,2)
|
||||
time.sleep(timeout) # Wait for the specified timeout before checking again
|
||||
else:
|
||||
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
|
||||
@ -149,6 +282,16 @@ class FirecrawlApp:
|
||||
self._handle_error(status_response, 'check crawl status')
|
||||
|
||||
def _handle_error(self, response, action):
|
||||
"""
|
||||
Handle errors from API responses.
|
||||
|
||||
Args:
|
||||
response (requests.Response): The response object from the API request.
|
||||
action (str): Description of the action that was being performed.
|
||||
|
||||
Raises:
|
||||
Exception: An exception with a message containing the status code and error details from the response.
|
||||
"""
|
||||
if response.status_code in [402, 408, 409, 500]:
|
||||
error_message = response.json().get('error', 'Unknown error occurred')
|
||||
raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')
|
||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.11.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.11.tar.gz
vendored
Normal file
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl-py-0.0.9.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl-py-0.0.9.tar.gz
vendored
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl_py-0.0.11-py3-none-any.whl
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl_py-0.0.11-py3-none-any.whl
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,7 +1,160 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: firecrawl-py
|
||||
Version: 0.0.9
|
||||
Version: 0.0.11
|
||||
Summary: Python SDK for Firecrawl API
|
||||
Home-page: https://github.com/mendableai/firecrawl
|
||||
Author: Mendable.ai
|
||||
Author-email: nick@mendable.ai
|
||||
License: GNU General Public License v3 (GPLv3)
|
||||
Project-URL: Documentation, https://docs.firecrawl.dev
|
||||
Project-URL: Source, https://github.com/mendableai/firecrawl
|
||||
Project-URL: Tracker, https://github.com/mendableai/firecrawl/issues
|
||||
Keywords: SDK API firecrawl
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Environment :: Web Environment
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Topic :: Internet
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
||||
Classifier: Topic :: Software Development
|
||||
Classifier: Topic :: Software Development :: Libraries
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing
|
||||
Classifier: Topic :: Text Processing :: Indexing
|
||||
Requires-Python: >=3.8
|
||||
Description-Content-Type: text/markdown
|
||||
|
||||
# Firecrawl Python SDK
|
||||
|
||||
The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
||||
|
||||
## Installation
|
||||
|
||||
To install the Firecrawl Python SDK, you can use pip:
|
||||
|
||||
```bash
|
||||
pip install firecrawl-py
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
|
||||
2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
|
||||
|
||||
|
||||
Here's an example of how to use the SDK:
|
||||
|
||||
```python
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
# Initialize the FirecrawlApp with your API key
|
||||
app = FirecrawlApp(api_key='your_api_key')
|
||||
|
||||
# Scrape a single URL
|
||||
url = 'https://mendable.ai'
|
||||
scraped_data = app.scrape_url(url)
|
||||
|
||||
# Crawl a website
|
||||
crawl_url = 'https://mendable.ai'
|
||||
params = {
|
||||
'pageOptions': {
|
||||
'onlyMainContent': True
|
||||
}
|
||||
}
|
||||
crawl_result = app.crawl_url(crawl_url, params=params)
|
||||
```
|
||||
|
||||
### Scraping a URL
|
||||
|
||||
To scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
|
||||
|
||||
```python
|
||||
url = 'https://example.com'
|
||||
scraped_data = app.scrape_url(url)
|
||||
```
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```python
|
||||
class ArticleSchema(BaseModel):
|
||||
title: str
|
||||
points: int
|
||||
by: str
|
||||
commentsURL: str
|
||||
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||
|
||||
data = app.scrape_url('https://news.ycombinator.com', {
|
||||
'extractorOptions': {
|
||||
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||
'mode': 'llm-extraction'
|
||||
},
|
||||
'pageOptions':{
|
||||
'onlyMainContent': True
|
||||
}
|
||||
})
|
||||
print(data["llm_extraction"])
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Used to search the web, get the most relevant results, scrap each page and return the markdown.
|
||||
|
||||
```python
|
||||
query = 'what is mendable?'
|
||||
search_result = app.search(query)
|
||||
```
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
To crawl a website, use the `crawl_url` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
|
||||
|
||||
The `wait_until_done` parameter determines whether the method should wait for the crawl job to complete before returning the result. If set to `True`, the method will periodically check the status of the crawl job until it is completed or the specified `timeout` (in seconds) is reached. If set to `False`, the method will return immediately with the job ID, and you can manually check the status of the crawl job using the `check_crawl_status` method.
|
||||
|
||||
```python
|
||||
crawl_url = 'https://example.com'
|
||||
params = {
|
||||
'crawlerOptions': {
|
||||
'excludes': ['blog/*'],
|
||||
'includes': [], # leave empty for all pages
|
||||
'limit': 1000,
|
||||
},
|
||||
'pageOptions': {
|
||||
'onlyMainContent': True
|
||||
}
|
||||
}
|
||||
crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
|
||||
```
|
||||
|
||||
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.
|
||||
|
||||
### Checking Crawl Status
|
||||
|
||||
To check the status of a crawl job, use the `check_crawl_status` method. It takes the job ID as a parameter and returns the current status of the crawl job.
|
||||
|
||||
```python
|
||||
job_id = crawl_result['jobId']
|
||||
status = app.check_crawl_status(job_id)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||
|
||||
## License
|
||||
|
||||
The Firecrawl Python SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
@ -1,16 +1,52 @@
|
||||
from setuptools import setup, find_packages
|
||||
from pathlib import Path
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
this_directory = Path(__file__).parent
|
||||
long_description_content = (this_directory / "README.md").read_text()
|
||||
|
||||
setup(
|
||||
name='firecrawl-py',
|
||||
version='0.0.9',
|
||||
url='https://github.com/mendableai/firecrawl',
|
||||
author='Mendable.ai',
|
||||
author_email='nick@mendable.ai',
|
||||
description='Python SDK for Firecrawl API',
|
||||
name="firecrawl-py",
|
||||
version="0.0.11",
|
||||
url="https://github.com/mendableai/firecrawl",
|
||||
author="Mendable.ai",
|
||||
author_email="nick@mendable.ai",
|
||||
description="Python SDK for Firecrawl API",
|
||||
long_description=long_description_content,
|
||||
long_description_content_type="text/markdown",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
'requests',
|
||||
'pytest',
|
||||
'python-dotenv',
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
classifiers=[
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Web Environment",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Natural Language :: English",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Topic :: Internet",
|
||||
"Topic :: Internet :: WWW/HTTP",
|
||||
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
||||
"Topic :: Software Development",
|
||||
"Topic :: Software Development :: Libraries",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
"Topic :: Text Processing",
|
||||
"Topic :: Text Processing :: Indexing",
|
||||
],
|
||||
keywords="SDK API firecrawl",
|
||||
project_urls={
|
||||
"Documentation": "https://docs.firecrawl.dev",
|
||||
"Source": "https://github.com/mendableai/firecrawl",
|
||||
"Tracker": "https://github.com/mendableai/firecrawl/issues",
|
||||
},
|
||||
license="GNU General Public License v3 (GPLv3)",
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user