Nick: v8 python
This commit is contained in:
parent
e6dbbf1bab
commit
4c88d5da66
@ -1,5 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
import requests
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
class FirecrawlApp:
|
class FirecrawlApp:
|
||||||
def __init__(self, api_key=None):
|
def __init__(self, api_key=None):
|
||||||
@ -7,26 +9,45 @@ class FirecrawlApp:
|
|||||||
if self.api_key is None:
|
if self.api_key is None:
|
||||||
raise ValueError('No API key provided')
|
raise ValueError('No API key provided')
|
||||||
|
|
||||||
def scrape_url(self, url, params=None):
|
|
||||||
|
|
||||||
|
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||||
headers = {
|
headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': f'Bearer {self.api_key}'
|
'Authorization': f'Bearer {self.api_key}'
|
||||||
}
|
}
|
||||||
json_data = {'url': url}
|
# Prepare the base scrape parameters with the URL
|
||||||
|
scrape_params = {'url': url}
|
||||||
|
|
||||||
|
# If there are additional params, process them
|
||||||
if params:
|
if params:
|
||||||
json_data.update(params)
|
# Initialize extractorOptions if present
|
||||||
|
extractor_options = params.get('extractorOptions', {})
|
||||||
|
# Check and convert the extractionSchema if it's a Pydantic model
|
||||||
|
if 'extractionSchema' in extractor_options:
|
||||||
|
if hasattr(extractor_options['extractionSchema'], 'schema'):
|
||||||
|
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
|
||||||
|
# Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided
|
||||||
|
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
||||||
|
# Update the scrape_params with the processed extractorOptions
|
||||||
|
scrape_params['extractorOptions'] = extractor_options
|
||||||
|
|
||||||
|
# Include any other params directly at the top level of scrape_params
|
||||||
|
for key, value in params.items():
|
||||||
|
if key != 'extractorOptions':
|
||||||
|
scrape_params[key] = value
|
||||||
|
# Make the POST request with the prepared headers and JSON data
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
'https://api.firecrawl.dev/v0/scrape',
|
'https://api.firecrawl.dev/v0/scrape',
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=json_data
|
json=scrape_params
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
response = response.json()
|
response = response.json()
|
||||||
if response['success'] == True:
|
if response['success']:
|
||||||
return response['data']
|
return response['data']
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||||
|
|
||||||
elif response.status_code in [402, 409, 500]:
|
elif response.status_code in [402, 409, 500]:
|
||||||
error_message = response.json().get('error', 'Unknown error occurred')
|
error_message = response.json().get('error', 'Unknown error occurred')
|
||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
||||||
@ -88,11 +109,23 @@ class FirecrawlApp:
|
|||||||
'Authorization': f'Bearer {self.api_key}'
|
'Authorization': f'Bearer {self.api_key}'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _post_request(self, url, data, headers):
|
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||||
return requests.post(url, headers=headers, json=data)
|
for attempt in range(retries):
|
||||||
|
response = requests.post(url, headers=headers, json=data)
|
||||||
|
if response.status_code == 502:
|
||||||
|
time.sleep(backoff_factor * (2 ** attempt))
|
||||||
|
else:
|
||||||
|
return response
|
||||||
|
return response
|
||||||
|
|
||||||
def _get_request(self, url, headers):
|
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
|
||||||
return requests.get(url, headers=headers)
|
for attempt in range(retries):
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
if response.status_code == 502:
|
||||||
|
time.sleep(backoff_factor * (2 ** attempt))
|
||||||
|
else:
|
||||||
|
return response
|
||||||
|
return response
|
||||||
|
|
||||||
def _monitor_job_status(self, job_id, headers, timeout):
|
def _monitor_job_status(self, job_id, headers, timeout):
|
||||||
import time
|
import time
|
||||||
|
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
BIN
apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz
vendored
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl
vendored
Normal file
BIN
apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl
vendored
Normal file
Binary file not shown.
@ -9,12 +9,7 @@ class FirecrawlApp:
|
|||||||
if self.api_key is None:
|
if self.api_key is None:
|
||||||
raise ValueError('No API key provided')
|
raise ValueError('No API key provided')
|
||||||
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import Optional, Dict, Any
|
|
||||||
|
|
||||||
class ScrapeParams(BaseModel):
|
|
||||||
url: str
|
|
||||||
extractorOptions: Optional[Dict[str, Any]] = None
|
|
||||||
|
|
||||||
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||||
headers = {
|
headers = {
|
||||||
@ -41,7 +36,6 @@ class FirecrawlApp:
|
|||||||
for key, value in params.items():
|
for key, value in params.items():
|
||||||
if key != 'extractorOptions':
|
if key != 'extractorOptions':
|
||||||
scrape_params[key] = value
|
scrape_params[key] = value
|
||||||
print(scrape_params)
|
|
||||||
# Make the POST request with the prepared headers and JSON data
|
# Make the POST request with the prepared headers and JSON data
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
'https://api.firecrawl.dev/v0/scrape',
|
'https://api.firecrawl.dev/v0/scrape',
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
Metadata-Version: 2.1
|
Metadata-Version: 2.1
|
||||||
Name: firecrawl-py
|
Name: firecrawl-py
|
||||||
Version: 0.0.6
|
Version: 0.0.8
|
||||||
Summary: Python SDK for Firecrawl API
|
Summary: Python SDK for Firecrawl API
|
||||||
Home-page: https://github.com/mendableai/firecrawl
|
Home-page: https://github.com/mendableai/firecrawl
|
||||||
Author: Mendable.ai
|
Author: Mendable.ai
|
||||||
|
@ -2,12 +2,12 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='firecrawl-py',
|
name='firecrawl-py',
|
||||||
version='0.0.7',
|
version='0.0.8',
|
||||||
url='https://github.com/mendableai/firecrawl',
|
url='https://github.com/mendableai/firecrawl',
|
||||||
author='Mendable.ai',
|
author='Mendable.ai',
|
||||||
author_email='nick@mendable.ai',
|
author_email='nick@mendable.ai',
|
||||||
description='Python SDK for Firecrawl API',
|
description='Python SDK for Firecrawl API',
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'requests',
|
'requests',
|
||||||
],
|
],
|
||||||
|
Loading…
Reference in New Issue
Block a user