Nick:
This commit is contained in:
parent
ade4e05cff
commit
24be4866c5
@ -151,7 +151,6 @@ export class WebCrawler {
|
|||||||
concurrencyLimit: number,
|
concurrencyLimit: number,
|
||||||
inProgress?: (progress: Progress) => void,
|
inProgress?: (progress: Progress) => void,
|
||||||
): Promise<{ url: string, html: string }[]> {
|
): Promise<{ url: string, html: string }[]> {
|
||||||
console.log("Crawling URLs: ", urls);
|
|
||||||
const queue = async.queue(async (task: string, callback) => {
|
const queue = async.queue(async (task: string, callback) => {
|
||||||
if (this.crawledUrls.size >= this.maxCrawledLinks) {
|
if (this.crawledUrls.size >= this.maxCrawledLinks) {
|
||||||
if (callback && typeof callback === "function") {
|
if (callback && typeof callback === "function") {
|
||||||
|
@ -1,4 +1,10 @@
|
|||||||
[{
|
[
|
||||||
|
{
|
||||||
|
"website": "https://www.vellum.ai/llm-leaderboard",
|
||||||
|
"expected_min_num_of_pages": 1,
|
||||||
|
"expected_crawled_pages": ["https://www.vellum.ai/llm-leaderboard"]
|
||||||
|
},
|
||||||
|
{
|
||||||
"website": "https://openai.com/news",
|
"website": "https://openai.com/news",
|
||||||
"expected_min_num_of_pages": 4,
|
"expected_min_num_of_pages": 4,
|
||||||
"expected_crawled_pages": [
|
"expected_crawled_pages": [
|
||||||
@ -70,8 +76,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"website": "https://ycombinator.com/companies",
|
"website": "https://ycombinator.com/companies",
|
||||||
"expected_min_num_of_pages": 20,
|
"expected_min_num_of_pages": 20,
|
||||||
@ -115,11 +119,7 @@
|
|||||||
],
|
],
|
||||||
"notes": "This one should not go backwards, but it does!"
|
"notes": "This one should not go backwards, but it does!"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"website": "https://www.vellum.ai/llm-leaderboard",
|
|
||||||
"expected_min_num_of_pages": 0,
|
|
||||||
"expected_crawled_pages": [""]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"website": "https://www.instructables.com/circuits",
|
"website": "https://www.instructables.com/circuits",
|
||||||
"expected_min_num_of_pages": 12,
|
"expected_min_num_of_pages": 12,
|
||||||
|
Loading…
Reference in New Issue
Block a user