2024-04-18 01:23:10 -04:00
{
2024-04-22 11:41:54 -04:00
"openapi" : "3.0.0" ,
"info" : {
"title" : "Firecrawl API" ,
"version" : "1.0.0" ,
"description" : "API for interacting with Firecrawl services to perform web scraping and crawling tasks." ,
"contact" : {
"name" : "Firecrawl Support" ,
"url" : "https://firecrawl.dev/support" ,
"email" : "support@firecrawl.dev"
}
} ,
"servers" : [
{
"url" : "https://api.firecrawl.dev/v0"
}
] ,
"paths" : {
"/scrape" : {
"post" : {
"summary" : "Scrape a single URL" ,
"operationId" : "scrapeSingleUrl" ,
"tags" : [ "Scraping" ] ,
"security" : [
{
"bearerAuth" : [ ]
}
] ,
"requestBody" : {
"required" : true ,
"content" : {
"application/json" : {
"schema" : {
"type" : "object" ,
"properties" : {
"url" : {
"type" : "string" ,
"format" : "uri" ,
"description" : "The URL to scrape"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"pageOptions" : {
"type" : "object" ,
"properties" : {
"onlyMainContent" : {
"type" : "boolean" ,
"description" : "Only return the main content of the page excluding headers, navs, footers, etc." ,
"default" : false
}
}
2024-04-18 01:23:10 -04:00
}
2024-04-22 11:41:54 -04:00
} ,
"required" : [ "url" ]
2024-04-18 01:23:10 -04:00
}
}
}
2024-04-22 11:41:54 -04:00
} ,
"responses" : {
"200" : {
"description" : "Successful response" ,
2024-04-18 01:23:10 -04:00
"content" : {
"application/json" : {
"schema" : {
2024-04-22 11:41:54 -04:00
"$ref" : "#/components/schemas/ScrapeResponse"
2024-04-18 01:23:10 -04:00
}
}
}
} ,
2024-04-22 11:41:54 -04:00
"402" : {
"description" : "Payment required"
} ,
"429" : {
"description" : "Too many requests"
} ,
"500" : {
"description" : "Server error"
2024-04-18 01:23:10 -04:00
}
}
2024-04-22 11:41:54 -04:00
}
} ,
"/crawl" : {
"post" : {
"summary" : "Crawl multiple URLs based on options" ,
"operationId" : "crawlUrls" ,
"tags" : [ "Crawling" ] ,
"security" : [
{
"bearerAuth" : [ ]
}
] ,
"requestBody" : {
"required" : true ,
"content" : {
"application/json" : {
2024-04-18 01:23:10 -04:00
"schema" : {
2024-04-22 11:41:54 -04:00
"type" : "object" ,
"properties" : {
"url" : {
"type" : "string" ,
"format" : "uri" ,
"description" : "The base URL to start crawling from"
} ,
"crawlerOptions" : {
2024-04-18 01:23:10 -04:00
"type" : "object" ,
"properties" : {
2024-04-22 11:41:54 -04:00
"includes" : {
"type" : "array" ,
"items" : {
"type" : "string"
} ,
"description" : "URL patterns to include"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"excludes" : {
"type" : "array" ,
"items" : {
"type" : "string"
} ,
"description" : "URL patterns to exclude"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"generateImgAltText" : {
"type" : "boolean" ,
"description" : "Generate alt text for images using LLMs (must have a paid plan)" ,
"default" : false
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"returnOnlyUrls" : {
"type" : "boolean" ,
"description" : "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents." ,
"default" : false
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"limit" : {
2024-04-18 01:23:10 -04:00
"type" : "integer" ,
2024-04-22 11:41:54 -04:00
"description" : "Maximum number of pages to crawl"
}
}
} ,
"pageOptions" : {
"type" : "object" ,
"properties" : {
"onlyMainContent" : {
"type" : "boolean" ,
"description" : "Only return the main content of the page excluding headers, navs, footers, etc." ,
"default" : false
2024-04-18 01:23:10 -04:00
}
}
}
2024-04-22 11:41:54 -04:00
} ,
"required" : [ "url" ]
}
}
}
} ,
"responses" : {
"200" : {
"description" : "Successful response" ,
"content" : {
"application/json" : {
"schema" : {
"$ref" : "#/components/schemas/CrawlResponse"
2024-04-18 01:23:10 -04:00
}
}
}
2024-04-22 11:41:54 -04:00
} ,
"402" : {
"description" : "Payment required"
} ,
"429" : {
"description" : "Too many requests"
} ,
"500" : {
"description" : "Server error"
2024-04-18 01:23:10 -04:00
}
}
}
} ,
2024-04-22 11:41:54 -04:00
"/crawl/status/{jobId}" : {
"get" : {
"tags" : [ "Crawl" ] ,
"summary" : "Get the status of a crawl job" ,
"operationId" : "getCrawlStatus" ,
"security" : [
{
"bearerAuth" : [ ]
}
] ,
"parameters" : [
{
"name" : "jobId" ,
"in" : "path" ,
"description" : "ID of the crawl job" ,
"required" : true ,
"schema" : {
"type" : "string"
}
}
] ,
"responses" : {
"200" : {
"description" : "Successful response" ,
"content" : {
"application/json" : {
"schema" : {
2024-04-18 01:23:10 -04:00
"type" : "object" ,
"properties" : {
2024-04-22 11:41:54 -04:00
"status" : {
"type" : "string" ,
"description" : "Status of the job (completed, active, failed, paused)"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"current" : {
"type" : "integer" ,
"description" : "Current page number"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"current_url" : {
2024-04-18 01:23:10 -04:00
"type" : "string" ,
2024-04-22 11:41:54 -04:00
"description" : "Current URL being scraped"
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"current_step" : {
2024-04-18 01:23:10 -04:00
"type" : "string" ,
2024-04-22 11:41:54 -04:00
"description" : "Current step in the process"
} ,
"total" : {
"type" : "integer" ,
"description" : "Total number of pages"
} ,
"data" : {
"type" : "array" ,
"items" : {
"$ref" : "#/components/schemas/ScrapeResponse"
} ,
"description" : " D a t a r e t u r n e d f r o m t h e j o b ( null w h e n i t i s i n p r o g r e s s ) "
2024-04-18 01:23:10 -04:00
}
}
}
}
}
2024-04-22 11:41:54 -04:00
} ,
"402" : {
"description" : "Payment required"
} ,
"429" : {
"description" : "Too many requests"
} ,
"500" : {
"description" : "Server error"
2024-04-18 01:23:10 -04:00
}
}
}
2024-04-22 11:41:54 -04:00
}
} ,
"components" : {
"securitySchemes" : {
"bearerAuth" : {
"type" : "http" ,
"scheme" : "bearer"
}
2024-04-18 01:23:10 -04:00
} ,
2024-04-22 11:41:54 -04:00
"schemas" : {
"ScrapeResponse" : {
"type" : "object" ,
"properties" : {
"success" : {
"type" : "boolean"
} ,
"data" : {
"type" : "object" ,
"properties" : {
"content" : {
"type" : "string"
} ,
"markdown" : {
"type" : "string"
} ,
"metadata" : {
"type" : "object" ,
"properties" : {
"title" : {
"type" : "string"
} ,
"description" : {
"type" : "string"
} ,
"language" : {
"type" : "string" ,
"nullable" : true
} ,
"sourceURL" : {
"type" : "string" ,
"format" : "uri"
}
}
}
}
}
}
} ,
"CrawlResponse" : {
"type" : "object" ,
"properties" : {
"jobId" : {
"type" : "string"
}
}
2024-04-18 01:23:10 -04:00
}
2024-04-22 11:41:54 -04:00
}
} ,
"security" : [
{
"bearerAuth" : [ ]
}
]
}