Merge pull request #252 from mattjoyce/fix-208-py-sdk-interval-poll-name
Fix 208 py sdk interval poll name
This commit is contained in:
commit
aafd23fa8a
@ -115,7 +115,7 @@ class FirecrawlApp:
|
|||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
response = response.json()
|
response = response.json()
|
||||||
|
|
||||||
if response['success'] and 'data' in response:
|
if response['success'] and 'data' in response:
|
||||||
return response['data']
|
return response['data']
|
||||||
else:
|
else:
|
||||||
@ -127,7 +127,7 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||||
|
|
||||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
|
def crawl_url(self, url, params=None, wait_until_done=True, poll_interval=2, idempotency_key=None):
|
||||||
"""
|
"""
|
||||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||||
|
|
||||||
@ -135,7 +135,7 @@ class FirecrawlApp:
|
|||||||
url (str): The URL to crawl.
|
url (str): The URL to crawl.
|
||||||
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
||||||
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
||||||
timeout (int): Timeout between status checks when waiting for job completion.
|
poll_interval (int): Time in seconds between status checks when waiting for job completion.
|
||||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -152,7 +152,7 @@ class FirecrawlApp:
|
|||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
job_id = response.json().get('jobId')
|
job_id = response.json().get('jobId')
|
||||||
if wait_until_done:
|
if wait_until_done:
|
||||||
return self._monitor_job_status(job_id, headers, timeout)
|
return self._monitor_job_status(job_id, headers, poll_interval)
|
||||||
else:
|
else:
|
||||||
return {'jobId': job_id}
|
return {'jobId': job_id}
|
||||||
else:
|
else:
|
||||||
@ -249,14 +249,14 @@ class FirecrawlApp:
|
|||||||
return response
|
return response
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _monitor_job_status(self, job_id, headers, timeout):
|
def _monitor_job_status(self, job_id, headers, poll_interval):
|
||||||
"""
|
"""
|
||||||
Monitor the status of a crawl job until completion.
|
Monitor the status of a crawl job until completion.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
job_id (str): The ID of the crawl job.
|
job_id (str): The ID of the crawl job.
|
||||||
headers (Dict[str, str]): The headers to include in the status check requests.
|
headers (Dict[str, str]): The headers to include in the status check requests.
|
||||||
timeout (int): Timeout between status checks.
|
poll_interval (int): Secounds between status checks.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: The crawl results if the job is completed successfully.
|
Any: The crawl results if the job is completed successfully.
|
||||||
@ -274,8 +274,8 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
raise Exception('Crawl job completed but no data was returned')
|
raise Exception('Crawl job completed but no data was returned')
|
||||||
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
|
elif status_data['status'] in ['active', 'paused', 'pending', 'queued', 'waiting']:
|
||||||
timeout=max(timeout,2)
|
poll_interval=max(poll_interval,2)
|
||||||
time.sleep(timeout) # Wait for the specified timeout before checking again
|
time.sleep(poll_interval) # Wait for the specified interval before checking again
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
|
raise Exception(f'Crawl job failed or was stopped. Status: {status_data["status"]}')
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user