Update crawl.json
This commit is contained in:
parent
499671c87f
commit
98dd672d0a
@ -117,21 +117,11 @@
|
|||||||
],
|
],
|
||||||
"notes": "This one should not go backwards, but it does!"
|
"notes": "This one should not go backwards, but it does!"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://news.ycombinator.com/",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.vellum.ai/llm-leaderboard",
|
"website": "https://www.vellum.ai/llm-leaderboard",
|
||||||
"expected_min_num_of_pages": 0,
|
"expected_min_num_of_pages": 0,
|
||||||
"expected_crawled_pages": [""]
|
"expected_crawled_pages": [""]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.bigbadtoystore.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.instructables.com",
|
"website": "https://www.instructables.com",
|
||||||
"expected_min_num_of_pages": 78,
|
"expected_min_num_of_pages": 78,
|
||||||
@ -150,31 +140,6 @@
|
|||||||
"https://www.instructables.com/circuits/clocks/projects/"
|
"https://www.instructables.com/circuits/clocks/projects/"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.powells.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.royalacademy.org.uk",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.eastbaytimes.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.manchestereveningnews.co.uk",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://physicsworld.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://richmondconfidential.org",
|
"website": "https://richmondconfidential.org",
|
||||||
"expected_min_num_of_pages": 50,
|
"expected_min_num_of_pages": 50,
|
||||||
@ -191,12 +156,6 @@
|
|||||||
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
|
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.techinasia.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""],
|
|
||||||
"notes": "The website has a paywall and bot detectors."
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.boardgamegeek.com",
|
"website": "https://www.boardgamegeek.com",
|
||||||
"expected_min_num_of_pages": 15,
|
"expected_min_num_of_pages": 15,
|
||||||
@ -217,10 +176,5 @@
|
|||||||
"https://www.boardgamegeek.com/previews",
|
"https://www.boardgamegeek.com/previews",
|
||||||
"https://www.boardgamegeek.com/browse/boardgame"
|
"https://www.boardgamegeek.com/browse/boardgame"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"website": "https://www.mountainproject.com",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user