added python sdk e2e tests with pytest
some of them are still missing though
This commit is contained in:
parent
4ce28593b2
commit
397769c7e3
@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id)
|
|||||||
|
|
||||||
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
|
||||||
|
|
||||||
|
## Running the Tests with Pytest
|
||||||
|
|
||||||
|
To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
|
||||||
|
|
||||||
|
### Running the Tests
|
||||||
|
|
||||||
|
To run the tests, execute the following commands:
|
||||||
|
|
||||||
|
Install pytest:
|
||||||
|
```bash
|
||||||
|
pip install pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
Run:
|
||||||
|
```bash
|
||||||
|
pytest firecrawl/__tests__/e2e_withAuth/test.py
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
|
||||||
|
96
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
Normal file
96
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
import pytest
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
|
TEST_API_KEY = "fc-YOUR_API_KEY"
|
||||||
|
TEST_URL = "https://firecrawl.dev"
|
||||||
|
|
||||||
|
def test_scrape_url_e2e():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url(TEST_URL)
|
||||||
|
print(response)
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response
|
||||||
|
assert "🔥 Firecrawl" in response['content']
|
||||||
|
|
||||||
|
def test_scrape_url_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.scrape_url(TEST_URL)
|
||||||
|
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_crawl_url_e2e():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True)
|
||||||
|
assert response is not None
|
||||||
|
assert len(response) > 0
|
||||||
|
assert 'content' in response[0]
|
||||||
|
assert "🔥 Firecrawl" in response[0]['content']
|
||||||
|
|
||||||
|
def test_crawl_url_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.crawl_url(TEST_URL)
|
||||||
|
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_search_e2e():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.search("test query")
|
||||||
|
assert response is not None
|
||||||
|
assert 'content' in response[0]
|
||||||
|
assert len(response) > 2
|
||||||
|
|
||||||
|
def test_search_invalid_api_key():
|
||||||
|
invalid_app = FirecrawlApp(api_key="invalid_api_key")
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
invalid_app.search("test query")
|
||||||
|
assert "Failed to search. Status code: 401" in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_crawl_with_fast_mode():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True)
|
||||||
|
assert response is not None
|
||||||
|
assert len(response) > 0
|
||||||
|
assert 'content' in response[0]
|
||||||
|
|
||||||
|
def test_crawl_with_html_inclusion():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False)
|
||||||
|
assert response is not None
|
||||||
|
assert 'jobId' in response
|
||||||
|
|
||||||
|
def test_crawl_with_pdf_extraction():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001",
|
||||||
|
{'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False)
|
||||||
|
assert response is not None
|
||||||
|
assert 'jobId' in response
|
||||||
|
|
||||||
|
def test_timeout_during_scraping():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
with pytest.raises(Exception) as excinfo:
|
||||||
|
app.scrape_url(TEST_URL, {'timeout': 1000})
|
||||||
|
assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value)
|
||||||
|
|
||||||
|
def test_llm_extraction():
|
||||||
|
app = FirecrawlApp(api_key=TEST_API_KEY)
|
||||||
|
response = app.scrape_url("https://mendable.ai", {
|
||||||
|
'extractorOptions': {
|
||||||
|
'mode': 'llm-extraction',
|
||||||
|
'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||||
|
'extractionSchema': {
|
||||||
|
'type': 'object',
|
||||||
|
'properties': {
|
||||||
|
'company_mission': {'type': 'string'},
|
||||||
|
'supports_sso': {'type': 'boolean'},
|
||||||
|
'is_open_source': {'type': 'boolean'}
|
||||||
|
},
|
||||||
|
'required': ['company_mission', 'supports_sso', 'is_open_source']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
assert response is not None
|
||||||
|
assert 'llm_extraction' in response
|
||||||
|
llm_extraction = response['llm_extraction']
|
||||||
|
assert 'company_mission' in llm_extraction
|
||||||
|
assert isinstance(llm_extraction['supports_sso'], bool)
|
||||||
|
assert isinstance(llm_extraction['is_open_source'], bool)
|
@ -10,5 +10,6 @@ setup(
|
|||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'requests',
|
'requests',
|
||||||
|
'pytest',
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user