0

Align param name with JS SDK

timeout becomes poll_interval
This commit is contained in:
Matt Joyce 2024-06-08 07:37:08 +10:00
parent 98d82c4cec
commit 5f0df596ec

View File

@ -127,7 +127,7 @@ class FirecrawlApp:
else: else:
raise Exception(f'Failed to search. Status code: {response.status_code}') raise Exception(f'Failed to search. Status code: {response.status_code}')
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None): def crawl_url(self, url, params=None, wait_until_done=True, poll_interval=2, idempotency_key=None):
""" """
Initiate a crawl job for the specified URL using the Firecrawl API. Initiate a crawl job for the specified URL using the Firecrawl API.
@ -135,7 +135,7 @@ class FirecrawlApp:
url (str): The URL to crawl. url (str): The URL to crawl.
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request. params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
wait_until_done (bool): Whether to wait until the crawl job is completed. wait_until_done (bool): Whether to wait until the crawl job is completed.
timeout (int): Timeout between status checks when waiting for job completion. poll_interval (int): Time in seconds between status checks when waiting for job completion.
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests. idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
Returns: Returns:
@ -152,7 +152,7 @@ class FirecrawlApp:
if response.status_code == 200: if response.status_code == 200:
job_id = response.json().get('jobId') job_id = response.json().get('jobId')
if wait_until_done: if wait_until_done:
return self._monitor_job_status(job_id, headers, timeout) return self._monitor_job_status(job_id, headers, poll_interval)
else: else:
return {'jobId': job_id} return {'jobId': job_id}
else: else:
@ -249,14 +249,14 @@ class FirecrawlApp:
return response return response
return response return response
def _monitor_job_status(self, job_id, headers, timeout): def _monitor_job_status(self, job_id, headers, poll_interval):
""" """
Monitor the status of a crawl job until completion. Monitor the status of a crawl job until completion.
Args: Args:
job_id (str): The ID of the crawl job. job_id (str): The ID of the crawl job.
headers (Dict[str, str]): The headers to include in the status check requests. headers (Dict[str, str]): The headers to include in the status check requests.
timeout (int): Timeout between status checks. poll_interval (int): Secounds between status checks.
Returns: Returns:
Any: The crawl results if the job is completed successfully. Any: The crawl results if the job is completed successfully.
@ -274,8 +274,8 @@ class FirecrawlApp:
else: else:
raise Exception('Crawl job completed but no data was returned') raise Exception('Crawl job completed but no data was returned')
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']: elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
timeout=max(timeout,2) poll_interval=max(poll_interval,2)
time.sleep(timeout) # Wait for the specified timeout before checking again time.sleep(poll_interval) # Wait for the specified interval before checking again
else: else:
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}') raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
else: else: