Update runWebScraper.ts
This commit is contained in:
parent
3f090ffd7c
commit
f3ec21d9c4
@ -17,8 +17,10 @@ export async function startWebScraperPipeline({
|
|||||||
crawlerOptions: job.data.crawlerOptions,
|
crawlerOptions: job.data.crawlerOptions,
|
||||||
pageOptions: job.data.pageOptions,
|
pageOptions: job.data.pageOptions,
|
||||||
inProgress: (progress) => {
|
inProgress: (progress) => {
|
||||||
|
if (progress.currentDocument) {
|
||||||
partialDocs.push(progress.currentDocument);
|
partialDocs.push(progress.currentDocument);
|
||||||
job.progress({ ...progress, partialDocs: partialDocs });
|
job.progress({ ...progress, partialDocs: partialDocs });
|
||||||
|
}
|
||||||
},
|
},
|
||||||
onSuccess: (result) => {
|
onSuccess: (result) => {
|
||||||
job.moveToCompleted(result);
|
job.moveToCompleted(result);
|
||||||
@ -27,7 +29,7 @@ export async function startWebScraperPipeline({
|
|||||||
job.moveToFailed(error);
|
job.moveToFailed(error);
|
||||||
},
|
},
|
||||||
team_id: job.data.team_id,
|
team_id: job.data.team_id,
|
||||||
bull_job_id: job.id.toString()
|
bull_job_id: job.id.toString(),
|
||||||
})) as { success: boolean; message: string; docs: Document[] };
|
})) as { success: boolean; message: string; docs: Document[] };
|
||||||
}
|
}
|
||||||
export async function runWebScraper({
|
export async function runWebScraper({
|
||||||
@ -63,26 +65,25 @@ export async function runWebScraper({
|
|||||||
urls: [url],
|
urls: [url],
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
bullJobId: bull_job_id
|
bullJobId: bull_job_id,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
await provider.setOptions({
|
await provider.setOptions({
|
||||||
mode: mode,
|
mode: mode,
|
||||||
urls: url.split(","),
|
urls: url.split(","),
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
pageOptions: pageOptions
|
pageOptions: pageOptions,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
||||||
inProgress(progress);
|
inProgress(progress);
|
||||||
|
|
||||||
})) as Document[];
|
})) as Document[];
|
||||||
|
|
||||||
if (docs.length === 0) {
|
if (docs.length === 0) {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
message: "No pages found",
|
message: "No pages found",
|
||||||
docs: []
|
docs: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,18 +96,14 @@ export async function runWebScraper({
|
|||||||
})
|
})
|
||||||
: docs.filter((doc) => doc.content.trim().length > 0);
|
: docs.filter((doc) => doc.content.trim().length > 0);
|
||||||
|
|
||||||
|
const billingResult = await billTeam(team_id, filteredDocs.length);
|
||||||
const billingResult = await billTeam(
|
|
||||||
team_id,
|
|
||||||
filteredDocs.length
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!billingResult.success) {
|
if (!billingResult.success) {
|
||||||
// throw new Error("Failed to bill team, no subscription was found");
|
// throw new Error("Failed to bill team, no subscription was found");
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
message: "Failed to bill team, no subscription was found",
|
message: "Failed to bill team, no subscription was found",
|
||||||
docs: []
|
docs: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user