0
This commit is contained in:
Nicolas 2024-04-17 19:05:28 -07:00
parent 7ce2dd976f
commit 2bed55a3b4
3 changed files with 21 additions and 14 deletions

View File

@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling:
// Crawl a website
const crawlUrl = 'https://mendable.ai';
const crawlParams = {
const params = {
crawlerOptions: {
excludes: ['blog/'],
includes: [], // leave empty for all pages
limit: 1000,
},
pageOptions: {
onlyMainContent: true
}
};
const crawlResult = await app.crawlUrl(crawlUrl, crawlParams);
const crawlResult = await app.crawlUrl(crawlUrl, params);
console.log(crawlResult);
} catch (error) {
@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the
async function crawlExample() {
try {
const crawlUrl = 'https://example.com';
const crawlParams = {
const params = {
crawlerOptions: {
excludes: ['blog/'],
includes: [], // leave empty for all pages
limit: 1000,
},
pageOptions: {
onlyMainContent: true
}
};
const waitUntilDone = true;
const timeout = 5;
const crawlResult = await app.crawlUrl(
crawlUrl,
crawlParams,
params,
waitUntilDone,
timeout
);

View File

@ -1,12 +1,12 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.7",
"version": "0.0.9",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "0.0.7",
"version": "0.0.9",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",

View File

@ -30,14 +30,12 @@ scraped_data = app.scrape_url(url)
# Crawl a website
crawl_url = 'https://mendable.ai'
crawl_params = {
'crawlerOptions': {
'excludes': ['blog/*'],
'includes': [], # leave empty for all pages
'limit': 1000,
params = {
'pageOptions': {
'onlyMainContent': True
}
}
crawl_result = app.crawl_url(crawl_url, params=crawl_params)
crawl_result = app.crawl_url(crawl_url, params=params)
```
### Scraping a URL
@ -57,14 +55,17 @@ The `wait_until_done` parameter determines whether the method should wait for th
```python
crawl_url = 'https://example.com'
crawl_params = {
params = {
'crawlerOptions': {
'excludes': ['blog/*'],
'includes': [], # leave empty for all pages
'limit': 1000,
},
'pageOptions': {
'onlyMainContent': True
}
}
crawl_result = app.crawl_url(crawl_url, params=crawl_params, wait_until_done=True, timeout=5)
crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
```
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.