0
This commit is contained in:
Nicolas 2024-04-17 19:05:28 -07:00
parent 7ce2dd976f
commit 2bed55a3b4
3 changed files with 21 additions and 14 deletions

View File

@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling:
// Crawl a website // Crawl a website
const crawlUrl = 'https://mendable.ai'; const crawlUrl = 'https://mendable.ai';
const crawlParams = { const params = {
crawlerOptions: { crawlerOptions: {
excludes: ['blog/'], excludes: ['blog/'],
includes: [], // leave empty for all pages includes: [], // leave empty for all pages
limit: 1000, limit: 1000,
},
pageOptions: {
onlyMainContent: true
} }
}; };
const crawlResult = await app.crawlUrl(crawlUrl, crawlParams); const crawlResult = await app.crawlUrl(crawlUrl, params);
console.log(crawlResult); console.log(crawlResult);
} catch (error) { } catch (error) {
@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the
async function crawlExample() { async function crawlExample() {
try { try {
const crawlUrl = 'https://example.com'; const crawlUrl = 'https://example.com';
const crawlParams = { const params = {
crawlerOptions: { crawlerOptions: {
excludes: ['blog/'], excludes: ['blog/'],
includes: [], // leave empty for all pages includes: [], // leave empty for all pages
limit: 1000, limit: 1000,
},
pageOptions: {
onlyMainContent: true
} }
}; };
const waitUntilDone = true; const waitUntilDone = true;
const timeout = 5; const timeout = 5;
const crawlResult = await app.crawlUrl( const crawlResult = await app.crawlUrl(
crawlUrl, crawlUrl,
crawlParams, params,
waitUntilDone, waitUntilDone,
timeout timeout
); );

View File

@ -1,12 +1,12 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.7", "version": "0.0.9",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.7", "version": "0.0.9",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",

View File

@ -30,14 +30,12 @@ scraped_data = app.scrape_url(url)
# Crawl a website # Crawl a website
crawl_url = 'https://mendable.ai' crawl_url = 'https://mendable.ai'
crawl_params = { params = {
'crawlerOptions': { 'pageOptions': {
'excludes': ['blog/*'], 'onlyMainContent': True
'includes': [], # leave empty for all pages
'limit': 1000,
} }
} }
crawl_result = app.crawl_url(crawl_url, params=crawl_params) crawl_result = app.crawl_url(crawl_url, params=params)
``` ```
### Scraping a URL ### Scraping a URL
@ -57,14 +55,17 @@ The `wait_until_done` parameter determines whether the method should wait for th
```python ```python
crawl_url = 'https://example.com' crawl_url = 'https://example.com'
crawl_params = { params = {
'crawlerOptions': { 'crawlerOptions': {
'excludes': ['blog/*'], 'excludes': ['blog/*'],
'includes': [], # leave empty for all pages 'includes': [], # leave empty for all pages
'limit': 1000, 'limit': 1000,
},
'pageOptions': {
'onlyMainContent': True
} }
} }
crawl_result = app.crawl_url(crawl_url, params=crawl_params, wait_until_done=True, timeout=5) crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
``` ```
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised. If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.