diff --git a/apps/api/openapi.json b/apps/api/openapi.json index bb58ae3..3916738 100644 --- a/apps/api/openapi.json +++ b/apps/api/openapi.json @@ -1,295 +1,309 @@ { - "openapi": "3.0.0", - "info": { - "title": "Firecrawl API", - "version": "1.0.0", - "description": "API for interacting with Firecrawl services to convert websites to LLM-ready data.", - "contact": { - "name": "Firecrawl Support", - "url": "https://firecrawl.dev/support", - "email": "help@mendable.ai" - } - }, - "servers": [ - { - "url": "https://api.firecrawl.dev/v0" - } - ], - "paths": { - "/scrape": { - "post": { - "summary": "Scrape a single URL", - "operationId": "scrapeSingleUrl", - "tags": ["Scraping"], - "security": [ - { - "bearerAuth": [] - } - ], - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "The URL to scrape" - } - }, - "required": ["url"] - } - } - } - }, - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScrapeResponse" - } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } + "openapi": "3.0.0", + "info": { + "title": "Firecrawl API", + "version": "1.0.0", + "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.", + "contact": { + "name": "Firecrawl Support", + "url": "https://firecrawl.dev/support", + "email": "support@firecrawl.dev" + } + }, + "servers": [ + { + "url": "https://api.firecrawl.dev/v0" + } + ], + "paths": { + "/scrape": { + "post": { + "summary": "Scrape a single URL", + "operationId": "scrapeSingleUrl", + "tags": ["Scraping"], + "security": [ + { + "bearerAuth": [] } - } - }, - "/crawl": { - "post": { - "summary": "Crawl multiple URLs based on options", - "operationId": "crawlUrls", - "tags": ["Crawling"], - "security": [ - { - "bearerAuth": [] - } - ], - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri", - "description": "The base URL to start crawling from" - }, - "crawlerOptions": { - "type": "object", - "properties": { - "includes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL patterns to include" - }, - "excludes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "URL patterns to exclude" - }, - "generateImgAltText": { - "type": "boolean", - "description": "Generate alt text for images using LLMs (must have a paid plan)", - "default": false - }, - "limit": { - "type": "integer", - "description": "Maximum number of pages to crawl" - } - } - }, - "pageOptions": { - "type": "object", - "properties": { - "onlyMainContent": { - "type": "boolean", - "description": "Only return the main content of the page excluding headers, navs, footers, etc.", - "default": false - } - } - } - }, - "required": ["url"] - } - } - } - }, - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CrawlResponse" - } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } - } - } - }, - "/crawl/status/{jobId}": { - "get": { - "tags": ["Crawl"], - "summary": "Get the status of a crawl job", - "operationId": "getCrawlStatus", - "security": [ - { - "bearerAuth": [] - } - ], - "parameters": [ - { - "name": "jobId", - "in": "path", - "description": "ID of the crawl job", - "required": true, + ], + "requestBody": { + "required": true, + "content": { + "application/json": { "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "Successful response", - "content": { - "application/json": { - "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL to scrape" + }, + "pageOptions": { "type": "object", "properties": { - "status": { - "type": "string", - "description": "Status of the job (completed, active, failed, paused)" - }, - "current": { - "type": "integer", - "description": "Current page number" - }, - "current_url": { - "type": "string", - "description": "Current URL being scraped" - }, - "current_step": { - "type": "string", - "description": "Current step in the process" - }, - "total": { - "type": "integer", - "description": "Total number of pages" - }, - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScrapeResponse" - }, - "description": "Data returned from the job (null when it is in progress)" + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": false } } } - } - } - }, - "402": { - "description": "Payment required" - }, - "429": { - "description": "Too many requests" - }, - "500": { - "description": "Server error" - } - } - } - } - }, - "components": { - "securitySchemes": { - "bearerAuth": { - "type": "http", - "scheme": "bearer" - } - }, - "schemas": { - "ScrapeResponse": { - "type": "object", - "properties": { - "success": { - "type": "boolean" - }, - "data": { - "type": "object", - "properties": { - "content": { - "type": "string" }, - "markdown": { - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "language": { - "type": "string", - "nullable": true - }, - "sourceURL": { - "type": "string", - "format": "uri" - } - } - } + "required": ["url"] } } } }, - "CrawlResponse": { - "type": "object", - "properties": { - "jobId": { - "type": "string" + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ScrapeResponse" + } + } } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" } } } }, - "security": [ - { - "bearerAuth": [] + "/crawl": { + "post": { + "summary": "Crawl multiple URLs based on options", + "operationId": "crawlUrls", + "tags": ["Crawling"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The base URL to start crawling from" + }, + "crawlerOptions": { + "type": "object", + "properties": { + "includes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to include" + }, + "excludes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to exclude" + }, + "generateImgAltText": { + "type": "boolean", + "description": "Generate alt text for images using LLMs (must have a paid plan)", + "default": false + }, + "returnOnlyUrls": { + "type": "boolean", + "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.", + "default": false + }, + "limit": { + "type": "integer", + "description": "Maximum number of pages to crawl" + } + } + }, + "pageOptions": { + "type": "object", + "properties": { + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": false + } + } + } + }, + "required": ["url"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CrawlResponse" + } + } + } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" + } + } } - ] - } - \ No newline at end of file + }, + "/crawl/status/{jobId}": { + "get": { + "tags": ["Crawl"], + "summary": "Get the status of a crawl job", + "operationId": "getCrawlStatus", + "security": [ + { + "bearerAuth": [] + } + ], + "parameters": [ + { + "name": "jobId", + "in": "path", + "description": "ID of the crawl job", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "Status of the job (completed, active, failed, paused)" + }, + "current": { + "type": "integer", + "description": "Current page number" + }, + "current_url": { + "type": "string", + "description": "Current URL being scraped" + }, + "current_step": { + "type": "string", + "description": "Current step in the process" + }, + "total": { + "type": "integer", + "description": "Total number of pages" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScrapeResponse" + }, + "description": "Data returned from the job (null when it is in progress)" + } + } + } + } + } + }, + "402": { + "description": "Payment required" + }, + "429": { + "description": "Too many requests" + }, + "500": { + "description": "Server error" + } + } + } + } + }, + "components": { + "securitySchemes": { + "bearerAuth": { + "type": "http", + "scheme": "bearer" + } + }, + "schemas": { + "ScrapeResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "content": { + "type": "string" + }, + "markdown": { + "type": "string" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + }, + "CrawlResponse": { + "type": "object", + "properties": { + "jobId": { + "type": "string" + } + } + } + } + }, + "security": [ + { + "bearerAuth": [] + } + ] +}