Nick:
This commit is contained in:
parent
7ce2dd976f
commit
2bed55a3b4
@ -33,15 +33,18 @@ Here's an example of how to use the SDK with error handling:
|
|||||||
|
|
||||||
// Crawl a website
|
// Crawl a website
|
||||||
const crawlUrl = 'https://mendable.ai';
|
const crawlUrl = 'https://mendable.ai';
|
||||||
const crawlParams = {
|
const params = {
|
||||||
crawlerOptions: {
|
crawlerOptions: {
|
||||||
excludes: ['blog/'],
|
excludes: ['blog/'],
|
||||||
includes: [], // leave empty for all pages
|
includes: [], // leave empty for all pages
|
||||||
limit: 1000,
|
limit: 1000,
|
||||||
|
},
|
||||||
|
pageOptions: {
|
||||||
|
onlyMainContent: true
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const crawlResult = await app.crawlUrl(crawlUrl, crawlParams);
|
const crawlResult = await app.crawlUrl(crawlUrl, params);
|
||||||
console.log(crawlResult);
|
console.log(crawlResult);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -83,18 +86,21 @@ To crawl a website with error handling, use the `crawlUrl` method. It takes the
|
|||||||
async function crawlExample() {
|
async function crawlExample() {
|
||||||
try {
|
try {
|
||||||
const crawlUrl = 'https://example.com';
|
const crawlUrl = 'https://example.com';
|
||||||
const crawlParams = {
|
const params = {
|
||||||
crawlerOptions: {
|
crawlerOptions: {
|
||||||
excludes: ['blog/'],
|
excludes: ['blog/'],
|
||||||
includes: [], // leave empty for all pages
|
includes: [], // leave empty for all pages
|
||||||
limit: 1000,
|
limit: 1000,
|
||||||
|
},
|
||||||
|
pageOptions: {
|
||||||
|
onlyMainContent: true
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
const waitUntilDone = true;
|
const waitUntilDone = true;
|
||||||
const timeout = 5;
|
const timeout = 5;
|
||||||
const crawlResult = await app.crawlUrl(
|
const crawlResult = await app.crawlUrl(
|
||||||
crawlUrl,
|
crawlUrl,
|
||||||
crawlParams,
|
params,
|
||||||
waitUntilDone,
|
waitUntilDone,
|
||||||
timeout
|
timeout
|
||||||
);
|
);
|
||||||
|
4
apps/js-sdk/firecrawl/package-lock.json
generated
4
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.7",
|
"version": "0.0.9",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.7",
|
"version": "0.0.9",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
@ -30,14 +30,12 @@ scraped_data = app.scrape_url(url)
|
|||||||
|
|
||||||
# Crawl a website
|
# Crawl a website
|
||||||
crawl_url = 'https://mendable.ai'
|
crawl_url = 'https://mendable.ai'
|
||||||
crawl_params = {
|
params = {
|
||||||
'crawlerOptions': {
|
'pageOptions': {
|
||||||
'excludes': ['blog/*'],
|
'onlyMainContent': True
|
||||||
'includes': [], # leave empty for all pages
|
|
||||||
'limit': 1000,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
crawl_result = app.crawl_url(crawl_url, params=crawl_params)
|
crawl_result = app.crawl_url(crawl_url, params=params)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Scraping a URL
|
### Scraping a URL
|
||||||
@ -57,14 +55,17 @@ The `wait_until_done` parameter determines whether the method should wait for th
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
crawl_url = 'https://example.com'
|
crawl_url = 'https://example.com'
|
||||||
crawl_params = {
|
params = {
|
||||||
'crawlerOptions': {
|
'crawlerOptions': {
|
||||||
'excludes': ['blog/*'],
|
'excludes': ['blog/*'],
|
||||||
'includes': [], # leave empty for all pages
|
'includes': [], # leave empty for all pages
|
||||||
'limit': 1000,
|
'limit': 1000,
|
||||||
|
},
|
||||||
|
'pageOptions': {
|
||||||
|
'onlyMainContent': True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
crawl_result = app.crawl_url(crawl_url, params=crawl_params, wait_until_done=True, timeout=5)
|
crawl_result = app.crawl_url(crawl_url, params=params, wait_until_done=True, timeout=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.
|
If `wait_until_done` is set to `True`, the `crawl_url` method will return the crawl result once the job is completed. If the job fails or is stopped, an exception will be raised.
|
||||||
|
Loading…
Reference in New Issue
Block a user