0

Update runWebScraper.ts

This commit is contained in:
Nicolas 2024-05-13 13:57:22 -07:00
parent 3f090ffd7c
commit f3ec21d9c4

View File

@ -17,8 +17,10 @@ export async function startWebScraperPipeline({
crawlerOptions: job.data.crawlerOptions, crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions, pageOptions: job.data.pageOptions,
inProgress: (progress) => { inProgress: (progress) => {
if (progress.currentDocument) {
partialDocs.push(progress.currentDocument); partialDocs.push(progress.currentDocument);
job.progress({ ...progress, partialDocs: partialDocs }); job.progress({ ...progress, partialDocs: partialDocs });
}
}, },
onSuccess: (result) => { onSuccess: (result) => {
job.moveToCompleted(result); job.moveToCompleted(result);
@ -27,7 +29,7 @@ export async function startWebScraperPipeline({
job.moveToFailed(error); job.moveToFailed(error);
}, },
team_id: job.data.team_id, team_id: job.data.team_id,
bull_job_id: job.id.toString() bull_job_id: job.id.toString(),
})) as { success: boolean; message: string; docs: Document[] }; })) as { success: boolean; message: string; docs: Document[] };
} }
export async function runWebScraper({ export async function runWebScraper({
@ -63,26 +65,25 @@ export async function runWebScraper({
urls: [url], urls: [url],
crawlerOptions: crawlerOptions, crawlerOptions: crawlerOptions,
pageOptions: pageOptions, pageOptions: pageOptions,
bullJobId: bull_job_id bullJobId: bull_job_id,
}); });
} else { } else {
await provider.setOptions({ await provider.setOptions({
mode: mode, mode: mode,
urls: url.split(","), urls: url.split(","),
crawlerOptions: crawlerOptions, crawlerOptions: crawlerOptions,
pageOptions: pageOptions pageOptions: pageOptions,
}); });
} }
const docs = (await provider.getDocuments(false, (progress: Progress) => { const docs = (await provider.getDocuments(false, (progress: Progress) => {
inProgress(progress); inProgress(progress);
})) as Document[]; })) as Document[];
if (docs.length === 0) { if (docs.length === 0) {
return { return {
success: true, success: true,
message: "No pages found", message: "No pages found",
docs: [] docs: [],
}; };
} }
@ -95,18 +96,14 @@ export async function runWebScraper({
}) })
: docs.filter((doc) => doc.content.trim().length > 0); : docs.filter((doc) => doc.content.trim().length > 0);
const billingResult = await billTeam(team_id, filteredDocs.length);
const billingResult = await billTeam(
team_id,
filteredDocs.length
);
if (!billingResult.success) { if (!billingResult.success) {
// throw new Error("Failed to bill team, no subscription was found"); // throw new Error("Failed to bill team, no subscription was found");
return { return {
success: false, success: false,
message: "Failed to bill team, no subscription was found", message: "Failed to bill team, no subscription was found",
docs: [] docs: [],
}; };
} }