0
This commit is contained in:
Nicolas 2024-05-15 17:16:20 -07:00
parent ade4e05cff
commit 24be4866c5
2 changed files with 8 additions and 9 deletions

View File

@ -151,7 +151,6 @@ export class WebCrawler {
concurrencyLimit: number, concurrencyLimit: number,
inProgress?: (progress: Progress) => void, inProgress?: (progress: Progress) => void,
): Promise<{ url: string, html: string }[]> { ): Promise<{ url: string, html: string }[]> {
console.log("Crawling URLs: ", urls);
const queue = async.queue(async (task: string, callback) => { const queue = async.queue(async (task: string, callback) => {
if (this.crawledUrls.size >= this.maxCrawledLinks) { if (this.crawledUrls.size >= this.maxCrawledLinks) {
if (callback && typeof callback === "function") { if (callback && typeof callback === "function") {

View File

@ -1,4 +1,10 @@
[{ [
{
"website": "https://www.vellum.ai/llm-leaderboard",
"expected_min_num_of_pages": 1,
"expected_crawled_pages": ["https://www.vellum.ai/llm-leaderboard"]
},
{
"website": "https://openai.com/news", "website": "https://openai.com/news",
"expected_min_num_of_pages": 4, "expected_min_num_of_pages": 4,
"expected_crawled_pages": [ "expected_crawled_pages": [
@ -70,8 +76,6 @@
] ]
}, },
{ {
"website": "https://ycombinator.com/companies", "website": "https://ycombinator.com/companies",
"expected_min_num_of_pages": 20, "expected_min_num_of_pages": 20,
@ -115,11 +119,7 @@
], ],
"notes": "This one should not go backwards, but it does!" "notes": "This one should not go backwards, but it does!"
}, },
{
"website": "https://www.vellum.ai/llm-leaderboard",
"expected_min_num_of_pages": 0,
"expected_crawled_pages": [""]
},
{ {
"website": "https://www.instructables.com/circuits", "website": "https://www.instructables.com/circuits",
"expected_min_num_of_pages": 12, "expected_min_num_of_pages": 12,