From 9390816c1b7975b3349f402f562d1846e6845e2a Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Mon, 10 Jun 2024 18:26:25 -0700
Subject: [PATCH] Update openapi.json

---
 apps/api/openapi.json | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/apps/api/openapi.json b/apps/api/openapi.json
index ab452ff..55bfe1c 100644
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@@ -51,10 +51,19 @@
                         "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                         "default": false
                       },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
                       "waitFor": {
                         "type": "integer",
                         "description": "Wait x amount of milliseconds for the page to load to fetch content",
                         "default": 0
+                      },
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                       }
                     }
                   },
@@ -176,6 +185,11 @@
                         "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                         "default": "default"
                       },
+                      "ignoreSitemap": {
+                        "type": "boolean",
+                        "description": "Ignore the website sitemap when crawling",
+                        "default": false
+                      },
                       "limit": {
                         "type": "integer",
                         "description": "Maximum number of pages to crawl",
@@ -195,6 +209,15 @@
                         "type": "boolean",
                         "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                         "default": false
+                      },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request when scraping. Can be used to send cookies, user-agent, etc."
                       }
                     }
                   }
@@ -368,7 +391,7 @@
                       "items": {
                         "$ref": "#/components/schemas/CrawlStatusResponseObj"
                       },
-                      "description": "Partial documents returned as it is being crawls (streaming). When a page is ready it will append to the parial_data array - so no need to wait for all the website to be crawled."
+                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                     }
                   }
                 }
@@ -513,6 +536,10 @@
             "nullable": true,
             "description": "Raw HTML content of the page if `includeHtml`  is true"
           },
+          "index": {
+            "type": "integer",
+            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+          },
           "metadata": {
             "type": "object",
             "properties": {