diff --git a/apps/js-sdk/firecrawl/README.md b/apps/js-sdk/firecrawl/README.md index 0757511..3f92c32 100644 --- a/apps/js-sdk/firecrawl/README.md +++ b/apps/js-sdk/firecrawl/README.md @@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling: // Crawl a website const crawlUrl = 'https://mendable.ai'; - const crawlParams = { + const params = { crawlerOptions: { excludes: ['blog/'], includes: [], // leave empty for all pages limit: 1000, + }, + pageOptions: { + onlyMainContent: true } }; - const crawlResult = await app.crawlUrl(crawlUrl, crawlParams); + const crawlResult = await app.crawlUrl(crawlUrl, params); console.log(crawlResult); } catch (error) { @@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the async function crawlExample() { try { const crawlUrl = 'https://example.com'; - const crawlParams = { + const params = { crawlerOptions: { excludes: ['blog/'], includes: [], // leave empty for all pages limit: 1000, + }, + pageOptions: { + onlyMainContent: true } }; const waitUntilDone = true; const timeout = 5; const crawlResult = await app.crawlUrl( crawlUrl, - crawlParams, + params, waitUntilDone, timeout ); diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 98fafc5..0497c6e 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,12 +1,12 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.7", + "version": "0.0.9", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.7", + "version": "0.0.9", "license": "MIT", "dependencies": { "axios": "^1.6.8", diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 3ce405d..0a80202 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -30,14 +30,12 @@ scraped_data = app.scrape_url(url) # Crawl a website crawl_url = 'https://mendable.ai' -crawl_params = { - 'crawlerOptions': { - 'excludes': ['blog/*'], - 'includes': [], # leave empty for all pages - 'limit': 1000, +params = { + 'pageOptions': { + 'onlyMainContent': True } } -crawl_result = app.crawl_url(crawl_url, params=crawl_params) +crawl_result = app.crawl_url(crawl_url, params=params) ``` ### Scraping a URL @@ -57,14 +55,17 @@ The `wait_until_done` parameter determines whether the method should wait for th ```python crawl_url = 'https://example.com' -crawl_params = { +params = { 'crawlerOptions': { 'excludes': ['blog/*'], 'includes': [], # leave empty for all pages 'limit': 1000, + }, + 'pageOptions': { + 'onlyMainContent': True } } -crawl_result = app.crawl_url(crawl_url, params=crawl_params, wait_until_done=True, timeout=5) +crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5) ``` If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.