0

Added Docstrings for functions

PyLint C0116: Missing function or method docstring (missing-function-docstring)
This commit is contained in:
Matt Joyce 2024-05-23 08:20:32 +10:00
parent 971e1f85c4
commit 8adf2b7132

View File

@ -11,11 +11,20 @@ Classes:
"""
import os
from typing import Any, Dict, Optional
import requests
import time
from typing import Any, Dict, Optional
import requests
class FirecrawlApp:
"""
Initialize the FirecrawlApp instance.
Args:
api_key (Optional[str]): API key for authenticating with the Firecrawl API.
api_url (Optional[str]): Base URL for the Firecrawl API.
"""
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
if self.api_key is None:
@ -25,6 +34,20 @@ class FirecrawlApp:
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
"""
Scrape the specified URL using the Firecrawl API.
Args:
url (str): The URL to scrape.
params (Optional[Dict[str, Any]]): Additional parameters for the scrape request.
Returns:
Any: The scraped data if the request is successful.
Raises:
Exception: If the scrape request fails.
"""
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
@ -68,6 +91,19 @@ class FirecrawlApp:
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
def search(self, query, params=None):
"""
Perform a search using the Firecrawl API.
Args:
query (str): The search query.
params (Optional[Dict[str, Any]]): Additional parameters for the search request.
Returns:
Any: The search results if the request is successful.
Raises:
Exception: If the search request fails.
"""
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
@ -94,6 +130,21 @@ class FirecrawlApp:
raise Exception(f'Failed to search. Status code: {response.status_code}')
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
"""
Initiate a crawl job for the specified URL using the Firecrawl API.
Args:
url (str): The URL to crawl.
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
wait_until_done (bool): Whether to wait until the crawl job is completed.
timeout (int): Timeout between status checks when waiting for job completion.
Returns:
Any: The crawl job ID or the crawl results if waiting until completion.
Raises:
Exception: If the crawl job initiation or monitoring fails.
"""
headers = self._prepare_headers()
json_data = {'url': url}
if params:
@ -109,6 +160,18 @@ class FirecrawlApp:
self._handle_error(response, 'start crawl job')
def check_crawl_status(self, job_id):
"""
Check the status of a crawl job using the Firecrawl API.
Args:
job_id (str): The ID of the crawl job.
Returns:
Any: The status of the crawl job.
Raises:
Exception: If the status check request fails.
"""
headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
if response.status_code == 200:
@ -117,12 +180,34 @@ class FirecrawlApp:
self._handle_error(response, 'check crawl status')
def _prepare_headers(self):
"""
Prepare the headers for API requests.
Returns:
Dict[str, str]: The headers including content type and authorization.
"""
return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
}
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
"""
Make a POST request with retries.
Args:
url (str): The URL to send the POST request to.
data (Dict[str, Any]): The JSON data to include in the POST request.
headers (Dict[str, str]): The headers to include in the POST request.
retries (int): Number of retries for the request.
backoff_factor (float): Backoff factor for retries.
Returns:
requests.Response: The response from the POST request.
Raises:
requests.RequestException: If the request fails after the specified retries.
"""
for attempt in range(retries):
response = requests.post(url, headers=headers, json=data)
if response.status_code == 502:
@ -132,6 +217,21 @@ class FirecrawlApp:
return response
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
"""
Make a GET request with retries.
Args:
url (str): The URL to send the GET request to.
headers (Dict[str, str]): The headers to include in the GET request.
retries (int): Number of retries for the request.
backoff_factor (float): Backoff factor for retries.
Returns:
requests.Response: The response from the GET request.
Raises:
requests.RequestException: If the request fails after the specified retries.
"""
for attempt in range(retries):
response = requests.get(url, headers=headers)
if response.status_code == 502:
@ -141,6 +241,20 @@ class FirecrawlApp:
return response
def _monitor_job_status(self, job_id, headers, timeout):
"""
Monitor the status of a crawl job until completion.
Args:
job_id (str): The ID of the crawl job.
headers (Dict[str, str]): The headers to include in the status check requests.
timeout (int): Timeout between status checks.
Returns:
Any: The crawl results if the job is completed successfully.
Raises:
Exception: If the job fails or an error occurs during status checks.
"""
import time
while True:
status_response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers)
@ -161,6 +275,16 @@ class FirecrawlApp:
self._handle_error(status_response, 'check crawl status')
def _handle_error(self, response, action):
"""
Handle errors from API responses.
Args:
response (requests.Response): The response object from the API request.
action (str): Description of the action that was being performed.
Raises:
Exception: An exception with a message containing the status code and error details from the response.
"""
if response.status_code in [402, 408, 409, 500]:
error_message = response.json().get('error', 'Unknown error occurred')
raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')