From c71ea7a795f9096bbde997bbd48539a5ec865ea0 Mon Sep 17 00:00:00 2001 From: Matt Joyce Date: Sat, 8 Jun 2024 11:08:26 +1000 Subject: [PATCH 1/5] Prepare headers consistently --- apps/python-sdk/firecrawl/firecrawl.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index b9a823f..c5207fb 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -45,10 +45,8 @@ class FirecrawlApp: Exception: If the scrape request fails. """ - headers = { - 'Content-Type': 'application/json', - 'Authorization': f'Bearer {self.api_key}' - } + headers = self._prepare_headers() + # Prepare the base scrape parameters with the URL scrape_params = {'url': url} @@ -101,10 +99,7 @@ class FirecrawlApp: Raises: Exception: If the search request fails. """ - headers = { - 'Content-Type': 'application/json', - 'Authorization': f'Bearer {self.api_key}' - } + headers = self._prepare_headers() json_data = {'query': query} if params: json_data.update(params) @@ -297,3 +292,4 @@ class FirecrawlApp: raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}') else: raise Exception(f'Unexpected error occurred while trying to {action}. Status code: {response.status_code}') + From 9f306736afc3cead950da4898f0d8b658aac860e Mon Sep 17 00:00:00 2001 From: Matt Joyce Date: Sat, 8 Jun 2024 11:18:30 +1000 Subject: [PATCH 2/5] More detailed error handling --- apps/python-sdk/firecrawl/firecrawl.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index c5207fb..f13ba72 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -287,9 +287,20 @@ class FirecrawlApp: Raises: Exception: An exception with a message containing the status code and error details from the response. """ - if response.status_code in [402, 408, 409, 500]: - error_message = response.json().get('error', 'Unknown error occurred') - raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}') + error_message = response.json().get('error', 'No additional error details provided.') + + if response.status_code == 402: + message = f"Payment Required: Failed to {action}. {error_message}" + elif response.status_code == 408: + message = f"Request Timeout: Failed to {action} as the request timed out. {error_message}" + elif response.status_code == 409: + message = f"Conflict: Failed to {action} due to a conflict. {error_message}" + elif response.status_code == 500: + message = f"Internal Server Error: Failed to {action}. {error_message}" else: - raise Exception(f'Unexpected error occurred while trying to {action}. Status code: {response.status_code}') + message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message}" + + # Raise an HTTPError with the custom message and attach the response + raise requests.exceptions.HTTPError(message, response=response) + From 7477c5e5bd23b88faa30d7ddf5e34cb335a6b6fb Mon Sep 17 00:00:00 2001 From: Matt Joyce Date: Sat, 8 Jun 2024 11:28:51 +1000 Subject: [PATCH 3/5] Use error handler consistently --- apps/python-sdk/firecrawl/firecrawl.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index f13ba72..a820ef2 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -30,6 +30,7 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: """ Scrape the specified URL using the Firecrawl API. @@ -79,11 +80,8 @@ class FirecrawlApp: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 408, 409, 500]: - error_message = response.json().get('error', 'Unknown error occurred') - raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') else: - raise Exception(f'Failed to scrape URL. Status code: {response.status_code}') + self._handle_error(response, 'scrape URL') def search(self, query, params=None): """ @@ -116,11 +114,8 @@ class FirecrawlApp: else: raise Exception(f'Failed to search. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: - error_message = response.json().get('error', 'Unknown error occurred') - raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}') else: - raise Exception(f'Failed to search. Status code: {response.status_code}') + self._handle_error(response, 'search') def crawl_url(self, url, params=None, wait_until_done=True, poll_interval=2, idempotency_key=None): """ @@ -303,4 +298,3 @@ class FirecrawlApp: # Raise an HTTPError with the custom message and attach the response raise requests.exceptions.HTTPError(message, response=response) - From 6fd9ce1c89ca19ecb737a36778f7dcb0cd2a7c35 Mon Sep 17 00:00:00 2001 From: Matt Joyce Date: Sat, 8 Jun 2024 11:46:52 +1000 Subject: [PATCH 4/5] type hints and linting --- apps/python-sdk/firecrawl/firecrawl.py | 31 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index a820ef2..fb12af4 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -83,7 +83,7 @@ class FirecrawlApp: else: self._handle_error(response, 'scrape URL') - def search(self, query, params=None): + def search(self, query: str, params: Optional[Dict[str, Any]] = None) -> Any: """ Perform a search using the Firecrawl API. @@ -117,7 +117,11 @@ class FirecrawlApp: else: self._handle_error(response, 'search') - def crawl_url(self, url, params=None, wait_until_done=True, poll_interval=2, idempotency_key=None): + def crawl_url(self, url: str, + params: Optional[Dict[str, Any]] = None, + wait_until_done: bool = True, + poll_interval: int = 2, + idempotency_key: Optional[str] = None) -> Any: """ Initiate a crawl job for the specified URL using the Firecrawl API. @@ -148,7 +152,7 @@ class FirecrawlApp: else: self._handle_error(response, 'start crawl job') - def check_crawl_status(self, job_id): + def check_crawl_status(self, job_id: str) -> Any: """ Check the status of a crawl job using the Firecrawl API. @@ -168,7 +172,7 @@ class FirecrawlApp: else: self._handle_error(response, 'check crawl status') - def _prepare_headers(self, idempotency_key=None): + def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]: """ Prepare the headers for API requests. @@ -190,7 +194,11 @@ class FirecrawlApp: 'Authorization': f'Bearer {self.api_key}', } - def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): + def _post_request(self, url: str, + data: Dict[str, Any], + headers: Dict[str, str], + retries: int = 3, + backoff_factor: float = 0.5) -> requests.Response: """ Make a POST request with retries. @@ -215,7 +223,10 @@ class FirecrawlApp: return response return response - def _get_request(self, url, headers, retries=3, backoff_factor=0.5): + def _get_request(self, url: str, + headers: Dict[str, str], + retries: int = 3, + backoff_factor: float = 0.5) -> requests.Response: """ Make a GET request with retries. @@ -239,7 +250,7 @@ class FirecrawlApp: return response return response - def _monitor_job_status(self, job_id, headers, poll_interval): + def _monitor_job_status(self, job_id: str, headers: Dict[str, str], poll_interval: int) -> Any: """ Monitor the status of a crawl job until completion. @@ -271,7 +282,7 @@ class FirecrawlApp: else: self._handle_error(status_response, 'check crawl status') - def _handle_error(self, response, action): + def _handle_error(self, response: requests.Response, action: str) -> None: """ Handle errors from API responses. @@ -283,7 +294,7 @@ class FirecrawlApp: Exception: An exception with a message containing the status code and error details from the response. """ error_message = response.json().get('error', 'No additional error details provided.') - + if response.status_code == 402: message = f"Payment Required: Failed to {action}. {error_message}" elif response.status_code == 408: @@ -297,4 +308,4 @@ class FirecrawlApp: # Raise an HTTPError with the custom message and attach the response raise requests.exceptions.HTTPError(message, response=response) - + \ No newline at end of file From afee5684a3811f6a2be4437f0dad6fac9a285d36 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:05:19 -0300 Subject: [PATCH 5/5] Fixed tests' message and updated version --- apps/python-sdk/firecrawl/__init__.py | 2 +- .../firecrawl/__tests__/e2e_withAuth/test.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 2fe16ba..fbb2bdb 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp -__version__ = "0.0.15" +__version__ = "0.0.16" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 90a6498..452d498 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -27,14 +27,14 @@ def test_scrape_url_invalid_api_key(): invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: invalid_app.scrape_url('https://firecrawl.dev') - assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) def test_blocklisted_url(): blocklisted_url = "https://facebook.com/fake-test" app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) with pytest.raises(Exception) as excinfo: app.scrape_url(blocklisted_url) - assert "Failed to scrape URL. Status code: 403" in str(excinfo.value) + assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) def test_successful_response_with_valid_preview_token(): app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") @@ -86,14 +86,14 @@ def test_crawl_url_invalid_api_key(): invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: invalid_app.crawl_url('https://firecrawl.dev') - assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) + assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) def test_should_return_error_for_blocklisted_url(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) blocklisted_url = "https://twitter.com/fake-test" with pytest.raises(Exception) as excinfo: app.crawl_url(blocklisted_url) - assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value) + assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) def test_crawl_url_wait_for_completion_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) @@ -114,7 +114,7 @@ def test_crawl_url_with_idempotency_key_e2e(): with pytest.raises(Exception) as excinfo: app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) - assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value) + assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value) def test_check_crawl_status_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) @@ -141,7 +141,7 @@ def test_search_invalid_api_key(): invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: invalid_app.search("test query") - assert "Failed to search. Status code: 401" in str(excinfo.value) + assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) def test_llm_extraction(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)