Nick: readme
This commit is contained in:
parent
d9da4b53f8
commit
aa6b84c5fa
26
README.md
26
README.md
@ -248,6 +248,32 @@ url = 'https://example.com'
|
|||||||
scraped_data = app.scrape_url(url)
|
scraped_data = app.scrape_url(url)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Extracting structured data from a URL
|
||||||
|
|
||||||
|
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ArticleSchema(BaseModel):
|
||||||
|
title: str
|
||||||
|
points: int
|
||||||
|
by: str
|
||||||
|
commentsURL: str
|
||||||
|
|
||||||
|
class TopArticlesSchema(BaseModel):
|
||||||
|
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||||
|
|
||||||
|
data = app.scrape_url('https://news.ycombinator.com', {
|
||||||
|
'extractorOptions': {
|
||||||
|
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||||
|
'mode': 'llm-extraction'
|
||||||
|
},
|
||||||
|
'pageOptions':{
|
||||||
|
'onlyMainContent': True
|
||||||
|
}
|
||||||
|
})
|
||||||
|
print(data["llm_extraction"])
|
||||||
|
```
|
||||||
|
|
||||||
### Search for a query
|
### Search for a query
|
||||||
|
|
||||||
Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
|
Performs a web search, retrieve the top results, extract data from each page, and returns their markdown.
|
||||||
|
@ -46,6 +46,31 @@ To scrape a single URL, use the `scrape_url` method. It takes the URL as a param
|
|||||||
url = 'https://example.com'
|
url = 'https://example.com'
|
||||||
scraped_data = app.scrape_url(url)
|
scraped_data = app.scrape_url(url)
|
||||||
```
|
```
|
||||||
|
### Extracting structured data from a URL
|
||||||
|
|
||||||
|
With LLM extraction, you can easily extract structured data from any URL. We support pydantic schemas to make it easier for you too. Here is how you to use it:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ArticleSchema(BaseModel):
|
||||||
|
title: str
|
||||||
|
points: int
|
||||||
|
by: str
|
||||||
|
commentsURL: str
|
||||||
|
|
||||||
|
class TopArticlesSchema(BaseModel):
|
||||||
|
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
|
||||||
|
|
||||||
|
data = app.scrape_url('https://news.ycombinator.com', {
|
||||||
|
'extractorOptions': {
|
||||||
|
'extractionSchema': TopArticlesSchema.model_json_schema(),
|
||||||
|
'mode': 'llm-extraction'
|
||||||
|
},
|
||||||
|
'pageOptions':{
|
||||||
|
'onlyMainContent': True
|
||||||
|
}
|
||||||
|
})
|
||||||
|
print(data["llm_extraction"])
|
||||||
|
```
|
||||||
|
|
||||||
### Search for a query
|
### Search for a query
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user