Nick:
This commit is contained in:
parent
19cba43ee4
commit
0db0874b00
@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
returnOnlyUrls: true,
|
returnOnlyUrls: true,
|
||||||
},
|
},
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const docs = await a.getDocuments(false, (progress) => {
|
const docs = await a.getDocuments(false, (progress) => {
|
||||||
@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
crawlerOptions: { ...crawlerOptions },
|
crawlerOptions: { ...crawlerOptions },
|
||||||
team_id: team_id,
|
team_id: team_id,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
origin: req.body.origin ?? "api",
|
||||||
});
|
});
|
||||||
|
|
||||||
res.json({ jobId: job.id });
|
res.json({ jobId: job.id });
|
||||||
|
@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||||||
const mode = req.body.mode ?? "crawl";
|
const mode = req.body.mode ?? "crawl";
|
||||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
||||||
|
|
||||||
const job = await addWebScraperJob({
|
const job = await addWebScraperJob({
|
||||||
url: url,
|
url: url,
|
||||||
mode: mode ?? "crawl", // fix for single urls not working
|
mode: mode ?? "crawl", // fix for single urls not working
|
||||||
crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
|
crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
|
||||||
team_id: "preview",
|
team_id: "preview",
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
origin: "website-preview",
|
||||||
});
|
});
|
||||||
|
|
||||||
res.json({ jobId: job.id });
|
res.json({ jobId: job.id });
|
||||||
|
@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) {
|
|||||||
}
|
}
|
||||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
||||||
|
const origin = req.body.origin ?? "api";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||||
@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) {
|
|||||||
console.error(error);
|
console.error(error);
|
||||||
return res.status(500).json({ error: "Internal server error" });
|
return res.status(500).json({ error: "Internal server error" });
|
||||||
}
|
}
|
||||||
|
const startTime = new Date().getTime();
|
||||||
const result = await scrapeHelper(
|
const result = await scrapeHelper(
|
||||||
req,
|
req,
|
||||||
team_id,
|
team_id,
|
||||||
crawlerOptions,
|
crawlerOptions,
|
||||||
pageOptions
|
pageOptions
|
||||||
);
|
);
|
||||||
|
const endTime = new Date().getTime();
|
||||||
|
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||||
logJob({
|
logJob({
|
||||||
success: result.success,
|
success: result.success,
|
||||||
message: result.error,
|
message: result.error,
|
||||||
num_docs: 1,
|
num_docs: 1,
|
||||||
docs: [result.data],
|
docs: [result.data],
|
||||||
time_taken: 0,
|
time_taken: timeTakenInSeconds,
|
||||||
team_id: team_id,
|
team_id: team_id,
|
||||||
mode: "scrape",
|
mode: "scrape",
|
||||||
url: req.body.url,
|
url: req.body.url,
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
origin: origin,
|
||||||
});
|
});
|
||||||
return res.status(result.returnCode).json(result);
|
return res.status(result.returnCode).json(result);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -44,7 +44,11 @@ export async function runWebScraper({
|
|||||||
onSuccess: (result: any) => void;
|
onSuccess: (result: any) => void;
|
||||||
onError: (error: any) => void;
|
onError: (error: any) => void;
|
||||||
team_id: string;
|
team_id: string;
|
||||||
}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> {
|
}): Promise<{
|
||||||
|
success: boolean;
|
||||||
|
message: string;
|
||||||
|
docs: CrawlResult[];
|
||||||
|
}> {
|
||||||
try {
|
try {
|
||||||
const provider = new WebScraperDataProvider();
|
const provider = new WebScraperDataProvider();
|
||||||
if (mode === "crawl") {
|
if (mode === "crawl") {
|
||||||
@ -70,7 +74,7 @@ export async function runWebScraper({
|
|||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
message: "No pages found",
|
message: "No pages found",
|
||||||
docs: [],
|
docs: []
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,7 +91,7 @@ export async function runWebScraper({
|
|||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
message: "Failed to bill team, no subscription was found",
|
message: "Failed to bill team, no subscription was found",
|
||||||
docs: [],
|
docs: []
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) {
|
|||||||
num_docs: job.num_docs,
|
num_docs: job.num_docs,
|
||||||
docs: job.docs,
|
docs: job.docs,
|
||||||
time_taken: job.time_taken,
|
time_taken: job.time_taken,
|
||||||
team_id: job.team_id,
|
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||||
mode: job.mode,
|
mode: job.mode,
|
||||||
url: job.url,
|
url: job.url,
|
||||||
crawler_options: job.crawlerOptions,
|
crawler_options: job.crawlerOptions,
|
||||||
page_options: job.pageOptions,
|
page_options: job.pageOptions,
|
||||||
|
origin: job.origin,
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -17,6 +17,7 @@ getWebScraperQueue().process(
|
|||||||
current_url: "",
|
current_url: "",
|
||||||
});
|
});
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
console.log("Processing job", job.data);
|
||||||
const { success, message, docs } = await startWebScraperPipeline({ job });
|
const { success, message, docs } = await startWebScraperPipeline({ job });
|
||||||
const end = Date.now();
|
const end = Date.now();
|
||||||
const timeTakenInSeconds = (end - start) / 1000;
|
const timeTakenInSeconds = (end - start) / 1000;
|
||||||
@ -45,6 +46,7 @@ getWebScraperQueue().process(
|
|||||||
url: job.data.url,
|
url: job.data.url,
|
||||||
crawlerOptions: job.data.crawlerOptions,
|
crawlerOptions: job.data.crawlerOptions,
|
||||||
pageOptions: job.data.pageOptions,
|
pageOptions: job.data.pageOptions,
|
||||||
|
origin: job.data.origin,
|
||||||
});
|
});
|
||||||
done(null, data);
|
done(null, data);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import { supabase_service } from "./supabase";
|
import { supabase_service } from "./supabase";
|
||||||
|
|
||||||
export const callWebhook = async (teamId: string, data: any) => {
|
export const callWebhook = async (teamId: string, data: any) => {
|
||||||
|
try {
|
||||||
const { data: webhooksData, error } = await supabase_service
|
const { data: webhooksData, error } = await supabase_service
|
||||||
.from('webhooks')
|
.from('webhooks')
|
||||||
.select('url')
|
.select('url')
|
||||||
@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => {
|
|||||||
data: dataToSend,
|
data: dataToSend,
|
||||||
error: data.error || undefined,
|
error: data.error || undefined,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
}
|
} catch (error) {
|
||||||
|
console.error(`Error sending webhook for team ID: ${teamId}`, error.message);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ export interface WebScraperOptions {
|
|||||||
crawlerOptions: any;
|
crawlerOptions: any;
|
||||||
pageOptions: any;
|
pageOptions: any;
|
||||||
team_id: string;
|
team_id: string;
|
||||||
|
origin?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -36,6 +37,7 @@ export interface FirecrawlJob {
|
|||||||
url: string;
|
url: string;
|
||||||
crawlerOptions?: any;
|
crawlerOptions?: any;
|
||||||
pageOptions?: any;
|
pageOptions?: any;
|
||||||
|
origin: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user