0

Merge branch 'nsc/new-pricing'

This commit is contained in:
Nicolas 2024-05-30 16:08:31 -07:00
commit 260e31c68b

View File

@ -1,42 +1,42 @@
import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url"; // import { scrapWithFireEngine } from "../../src/scraper/WebScraper/single_url";
const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); // const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const scrapInBatches = async ( // const scrapInBatches = async (
urls: string[], // urls: string[],
batchSize: number, // batchSize: number,
delayMs: number // delayMs: number
) => { // ) => {
let successCount = 0; // let successCount = 0;
let errorCount = 0; // let errorCount = 0;
for (let i = 0; i < urls.length; i += batchSize) { // for (let i = 0; i < urls.length; i += batchSize) {
const batch = urls // const batch = urls
.slice(i, i + batchSize) // .slice(i, i + batchSize)
.map((url) => scrapWithFireEngine(url)); // .map((url) => scrapWithFireEngine(url));
try { // try {
const results = await Promise.all(batch); // const results = await Promise.all(batch);
results.forEach((data, index) => { // results.forEach((data, index) => {
if (data.trim() === "") { // if (data.trim() === "") {
errorCount++; // errorCount++;
} else { // } else {
successCount++; // successCount++;
console.log( // console.log(
`Scraping result ${i + index + 1}:`, // `Scraping result ${i + index + 1}:`,
data.trim().substring(0, 20) + "..." // data.trim().substring(0, 20) + "..."
); // );
} // }
}); // });
} catch (error) { // } catch (error) {
console.error("Error during scraping:", error); // console.error("Error during scraping:", error);
} // }
await delay(delayMs); // await delay(delayMs);
} // }
console.log(`Total successful scrapes: ${successCount}`); // console.log(`Total successful scrapes: ${successCount}`);
console.log(`Total errored scrapes: ${errorCount}`); // console.log(`Total errored scrapes: ${errorCount}`);
}; // };
function run() { // function run() {
const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com"); // const urls = Array.from({ length: 200 }, () => "https://scrapethissite.com");
scrapInBatches(urls, 10, 1000); // scrapInBatches(urls, 10, 1000);
} // }