0
This commit is contained in:
Nicolas 2024-04-23 15:44:11 -07:00
parent 0146157876
commit 5e3e2ec966
2 changed files with 41 additions and 23 deletions

View File

@ -4,14 +4,15 @@ import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types";
import { logJob } from "../services/logging/log_job";
import { PageOptions } from "../lib/entities";
import { PageOptions, SearchOptions } from "../lib/entities";
import { search } from "../search/googlesearch";
export async function searchHelper(
req: Request,
team_id: string,
crawlerOptions: any,
pageOptions: PageOptions
pageOptions: PageOptions,
searchOptions: SearchOptions
): Promise<{
success: boolean;
error?: string;
@ -19,11 +20,12 @@ export async function searchHelper(
returnCode: number;
}> {
const query = req.body.query;
const advanced = false;
if (!query) {
return { success: false, error: "Query is required", returnCode: 400 };
}
const res = await search(query, true, 7);
const res = await search(query, advanced, searchOptions.limit ?? 7);
let justSearch = pageOptions.fetchPageContent === false;
@ -34,24 +36,28 @@ export async function searchHelper(
if (res.results.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 };
}
console.log(res.results);
const a = new WebScraperDataProvider();
await a.setOptions({
mode: "single_urls",
urls: res.results.map((r) => r.url),
urls: res.results.map((r) => (!advanced ? r : r.url)),
crawlerOptions: {
...crawlerOptions,
},
pageOptions: {...pageOptions, onlyMainContent: pageOptions?.onlyMainContent ?? true, fetchPageContent: pageOptions?.fetchPageContent ?? true, fallback:false},
pageOptions: {
...pageOptions,
onlyMainContent: pageOptions?.onlyMainContent ?? true,
fetchPageContent: pageOptions?.fetchPageContent ?? true,
fallback: false,
},
});
const docs = await a.getDocuments(true);
if (docs.length === 0)
{
if (docs.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 };
}
// make sure doc.content is not empty
const filteredDocs = docs.filter(
(doc: { content?: string }) => doc.content && doc.content.trim().length > 0
@ -93,9 +99,15 @@ export async function searchController(req: Request, res: Response) {
return res.status(status).json({ error });
}
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? { onlyMainContent: true, fetchPageContent: true, fallback: false};
const pageOptions = req.body.pageOptions ?? {
onlyMainContent: true,
fetchPageContent: true,
fallback: false,
};
const origin = req.body.origin ?? "api";
const searchOptions = req.body.searchOptions ?? { limit: 7 };
try {
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1);
@ -111,7 +123,8 @@ export async function searchController(req: Request, res: Response) {
req,
team_id,
crawlerOptions,
pageOptions
pageOptions,
searchOptions
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;

View File

@ -14,6 +14,11 @@ export type PageOptions = {
fallback?: boolean;
fetchPageContent?: boolean;
};
export type SearchOptions = {
limit?: number;
};
export type WebScraperOptions = {
urls: string[];
mode: "single_urls" | "sitemap" | "crawl";