0

Update crawl.json

This commit is contained in:
Nicolas 2024-05-15 12:55:04 -07:00
parent 499671c87f
commit 98dd672d0a

View File

@ -117,21 +117,11 @@
],
"notes": "This one should not go backwards, but it does!"
},
{
"website": "https://news.ycombinator.com/",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.vellum.ai/llm-leaderboard",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.bigbadtoystore.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.instructables.com",
"expected_min_num_of_pages": 78,
@ -150,31 +140,6 @@
"https://www.instructables.com/circuits/clocks/projects/"
]
},
{
"website": "https://www.powells.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.royalacademy.org.uk",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.eastbaytimes.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://www.manchestereveningnews.co.uk",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://physicsworld.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{
"website": "https://richmondconfidential.org",
"expected_min_num_of_pages": 50,
@ -191,12 +156,6 @@
"https://richmondconfidential.org/2009/10/13/family-calls-for-end-to-violence-at-memorial-for-slain-woman-friend/"
]
},
{
"website": "https://www.techinasia.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""],
"notes": "The website has a paywall and bot detectors."
},
{
"website": "https://www.boardgamegeek.com",
"expected_min_num_of_pages": 15,
@ -217,10 +176,5 @@
"https://www.boardgamegeek.com/previews",
"https://www.boardgamegeek.com/browse/boardgame"
]
},
{
"website": "https://www.mountainproject.com",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
}
]