Nick:
This commit is contained in:
parent
39dca60241
commit
ddf9ff9c9a
@ -13,12 +13,15 @@ GET http://localhost:3002/v0/jobs/active HTTP/1.1
|
||||
|
||||
|
||||
### Scrape Website
|
||||
POST https://api.firecrawl.dev/v0/scrape HTTP/1.1
|
||||
POST http://localhost:3002/v0/crawl HTTP/1.1
|
||||
Authorization: Bearer
|
||||
content-type: application/json
|
||||
|
||||
{
|
||||
"url":"https://www.mendable.ai"
|
||||
"url":"https://www.mendable.ai",
|
||||
"crawlerOptions": {
|
||||
"returnOnlyUrls": true
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +37,7 @@ content-type: application/json
|
||||
|
||||
|
||||
### Check Job Status
|
||||
GET http://localhost:3002/v0/crawl/status/333ab225-dc3e-418b-9d4b-8fb833cbaf89 HTTP/1.1
|
||||
GET http://localhost:3002/v0/crawl/status/4dbf2b62-487d-45d7-a4f7-8f5e883dfecd HTTP/1.1
|
||||
Authorization: Bearer
|
||||
|
||||
### Get Job Result
|
||||
@ -48,5 +51,5 @@ content-type: application/json
|
||||
}
|
||||
|
||||
### Check Job Status
|
||||
GET https://api.firecrawl.dev/v0/crawl/status/cfcb71ac-23a3-4da5-bd85-d4e58b871d66
|
||||
GET https://api.firecrawl.dev/v0/crawl/status/abd12f69-06b2-4378-8753-118b811df59d
|
||||
Authorization: Bearer
|
@ -66,6 +66,7 @@ export async function runWebScraper({
|
||||
inProgress(progress);
|
||||
})) as CrawlResult[];
|
||||
|
||||
|
||||
if (docs.length === 0) {
|
||||
return {
|
||||
success: true,
|
||||
@ -75,7 +76,7 @@ export async function runWebScraper({
|
||||
}
|
||||
|
||||
// remove docs with empty content
|
||||
const filteredDocs = docs.filter((doc) => doc.content.trim().length > 0);
|
||||
const filteredDocs = crawlerOptions.returnOnlyUrls ? docs : docs.filter((doc) => doc.content.trim().length > 0);
|
||||
onSuccess(filteredDocs);
|
||||
|
||||
const { success, credit_usage } = await billTeam(
|
||||
|
@ -80,11 +80,16 @@ export class WebScraperDataProvider {
|
||||
});
|
||||
let links = await crawler.start(inProgress, 5, this.limit);
|
||||
if (this.returnOnlyUrls) {
|
||||
inProgress({
|
||||
current: links.length,
|
||||
total: links.length,
|
||||
status: "COMPLETED",
|
||||
currentDocumentUrl: this.urls[0],
|
||||
});
|
||||
return links.map((url) => ({
|
||||
content: "",
|
||||
markdown: "",
|
||||
metadata: { sourceURL: url },
|
||||
provider: "web",
|
||||
type: "text",
|
||||
}));
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user