From aa6b84c5fa591900c855a0419d5be9b3ca14f08b Mon Sep 17 00:00:00 2001
From: Nicolas <nicolascamara29@gmail.com>
Date: Wed, 8 May 2024 17:41:15 -0700
Subject: [PATCH] Nick: readme

---
 README.md                 | 26 ++++++++++++++++++++++++++
 apps/python-sdk/README.md | 25 +++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/README.md b/README.md
index 9ac5636..17ba373 100644
--- a/README.md
+++ b/README.md
@@ -248,6 +248,32 @@ url = 'https://example.com'
 scraped_data = app.scrape_url(url)
 ```
 
+### Extracting structured data from a URL
+
+With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
+
+```python
+class ArticleSchema(BaseModel):
+    title: str
+    points: int 
+    by: str
+    commentsURL: str
+
+class TopArticlesSchema(BaseModel):
+    top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
+
+data = app.scrape_url('https://news.ycombinator.com', {
+    'extractorOptions': {
+        'extractionSchema': TopArticlesSchema.model_json_schema(),
+        'mode': 'llm-extraction'
+    },
+    'pageOptions':{
+        'onlyMainContent': True
+    }
+})
+print(data["llm_extraction"])
+```
+
 ### Search for a query
 
 Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md
index 02ad307..38ca843 100644
--- a/apps/python-sdk/README.md
+++ b/apps/python-sdk/README.md
@@ -46,6 +46,31 @@ To scrape a single URL, use the `scrape_url` method. It takes the URL as a param
 url = 'https://example.com'
 scraped_data = app.scrape_url(url)
 ```
+### Extracting structured data from a URL
+
+With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
+
+```python
+class ArticleSchema(BaseModel):
+    title: str
+    points: int 
+    by: str
+    commentsURL: str
+
+class TopArticlesSchema(BaseModel):
+    top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
+
+data = app.scrape_url('https://news.ycombinator.com', {
+    'extractorOptions': {
+        'extractionSchema': TopArticlesSchema.model_json_schema(),
+        'mode': 'llm-extraction'
+    },
+    'pageOptions':{
+        'onlyMainContent': True
+    }
+})
+print(data["llm_extraction"])
+```
 
 ### Search for a query