0

Merge branch 'test/crawl-options'

This commit is contained in:
Nicolas 2024-05-16 11:03:43 -07:00
commit d407ec76dd

View File

@ -242,7 +242,7 @@
"query": { "query": {
"type": "string", "type": "string",
"format": "uri", "format": "uri",
"description": "The URL to scrape" "description": "The query to search for"
}, },
"pageOptions": { "pageOptions": {
"type": "object", "type": "object",
@ -354,14 +354,14 @@
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ScrapeResponse" "$ref": "#/components/schemas/CrawlStatusResponseObj"
}, },
"description": "Data returned from the job (null when it is in progress)" "description": "Data returned from the job (null when it is in progress)"
}, },
"partial_data": { "partial_data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ScrapeResponse" "$ref": "#/components/schemas/CrawlStatusResponseObj"
}, },
"description": "Partial documents returned as it is being crawls (streaming). When a page is ready it will append to the parial_data array - so no need to wait for all the website to be crawled." "description": "Partial documents returned as it is being crawls (streaming). When a page is ready it will append to the parial_data array - so no need to wait for all the website to be crawled."
} }
@ -484,6 +484,41 @@
} }
} }
}, },
"CrawlStatusResponseObj": {
"type": "object",
"properties": {
"markdown": {
"type": "string"
},
"content": {
"type": "string"
},
"html": {
"type": "string",
"nullable": true,
"description": "Raw HTML content of the page if `includeHtml` is true"
},
"metadata": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"language": {
"type": "string",
"nullable": true
},
"sourceURL": {
"type": "string",
"format": "uri"
}
}
}
}
},
"SearchResponse": { "SearchResponse": {
"type": "object", "type": "object",
"properties": { "properties": {