Nick:
This commit is contained in:
parent
0146157876
commit
5e3e2ec966
@ -4,14 +4,15 @@ import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
|
|||||||
import { authenticateUser } from "./auth";
|
import { authenticateUser } from "./auth";
|
||||||
import { RateLimiterMode } from "../types";
|
import { RateLimiterMode } from "../types";
|
||||||
import { logJob } from "../services/logging/log_job";
|
import { logJob } from "../services/logging/log_job";
|
||||||
import { PageOptions } from "../lib/entities";
|
import { PageOptions, SearchOptions } from "../lib/entities";
|
||||||
import { search } from "../search/googlesearch";
|
import { search } from "../search/googlesearch";
|
||||||
|
|
||||||
export async function searchHelper(
|
export async function searchHelper(
|
||||||
req: Request,
|
req: Request,
|
||||||
team_id: string,
|
team_id: string,
|
||||||
crawlerOptions: any,
|
crawlerOptions: any,
|
||||||
pageOptions: PageOptions
|
pageOptions: PageOptions,
|
||||||
|
searchOptions: SearchOptions
|
||||||
): Promise<{
|
): Promise<{
|
||||||
success: boolean;
|
success: boolean;
|
||||||
error?: string;
|
error?: string;
|
||||||
@ -19,11 +20,12 @@ export async function searchHelper(
|
|||||||
returnCode: number;
|
returnCode: number;
|
||||||
}> {
|
}> {
|
||||||
const query = req.body.query;
|
const query = req.body.query;
|
||||||
|
const advanced = false;
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return { success: false, error: "Query is required", returnCode: 400 };
|
return { success: false, error: "Query is required", returnCode: 400 };
|
||||||
}
|
}
|
||||||
|
|
||||||
const res = await search(query, true, 7);
|
const res = await search(query, advanced, searchOptions.limit ?? 7);
|
||||||
|
|
||||||
let justSearch = pageOptions.fetchPageContent === false;
|
let justSearch = pageOptions.fetchPageContent === false;
|
||||||
|
|
||||||
@ -34,24 +36,28 @@ export async function searchHelper(
|
|||||||
if (res.results.length === 0) {
|
if (res.results.length === 0) {
|
||||||
return { success: true, error: "No search results found", returnCode: 200 };
|
return { success: true, error: "No search results found", returnCode: 200 };
|
||||||
}
|
}
|
||||||
|
console.log(res.results);
|
||||||
|
|
||||||
const a = new WebScraperDataProvider();
|
const a = new WebScraperDataProvider();
|
||||||
await a.setOptions({
|
await a.setOptions({
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
urls: res.results.map((r) => r.url),
|
urls: res.results.map((r) => (!advanced ? r : r.url)),
|
||||||
crawlerOptions: {
|
crawlerOptions: {
|
||||||
...crawlerOptions,
|
...crawlerOptions,
|
||||||
},
|
},
|
||||||
pageOptions: {...pageOptions, onlyMainContent: pageOptions?.onlyMainContent ?? true, fetchPageContent: pageOptions?.fetchPageContent ?? true, fallback:false},
|
pageOptions: {
|
||||||
|
...pageOptions,
|
||||||
|
onlyMainContent: pageOptions?.onlyMainContent ?? true,
|
||||||
|
fetchPageContent: pageOptions?.fetchPageContent ?? true,
|
||||||
|
fallback: false,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const docs = await a.getDocuments(true);
|
const docs = await a.getDocuments(true);
|
||||||
if (docs.length === 0)
|
if (docs.length === 0) {
|
||||||
{
|
|
||||||
return { success: true, error: "No search results found", returnCode: 200 };
|
return { success: true, error: "No search results found", returnCode: 200 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// make sure doc.content is not empty
|
// make sure doc.content is not empty
|
||||||
const filteredDocs = docs.filter(
|
const filteredDocs = docs.filter(
|
||||||
(doc: { content?: string }) => doc.content && doc.content.trim().length > 0
|
(doc: { content?: string }) => doc.content && doc.content.trim().length > 0
|
||||||
@ -93,9 +99,15 @@ export async function searchController(req: Request, res: Response) {
|
|||||||
return res.status(status).json({ error });
|
return res.status(status).json({ error });
|
||||||
}
|
}
|
||||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: true, fetchPageContent: true, fallback: false};
|
const pageOptions = req.body.pageOptions ?? {
|
||||||
|
onlyMainContent: true,
|
||||||
|
fetchPageContent: true,
|
||||||
|
fallback: false,
|
||||||
|
};
|
||||||
const origin = req.body.origin ?? "api";
|
const origin = req.body.origin ?? "api";
|
||||||
|
|
||||||
|
const searchOptions = req.body.searchOptions ?? { limit: 7 };
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||||
await checkTeamCredits(team_id, 1);
|
await checkTeamCredits(team_id, 1);
|
||||||
@ -111,7 +123,8 @@ export async function searchController(req: Request, res: Response) {
|
|||||||
req,
|
req,
|
||||||
team_id,
|
team_id,
|
||||||
crawlerOptions,
|
crawlerOptions,
|
||||||
pageOptions
|
pageOptions,
|
||||||
|
searchOptions
|
||||||
);
|
);
|
||||||
const endTime = new Date().getTime();
|
const endTime = new Date().getTime();
|
||||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||||
|
@ -14,6 +14,11 @@ export type PageOptions = {
|
|||||||
fallback?: boolean;
|
fallback?: boolean;
|
||||||
fetchPageContent?: boolean;
|
fetchPageContent?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export type SearchOptions = {
|
||||||
|
limit?: number;
|
||||||
|
};
|
||||||
|
|
||||||
export type WebScraperOptions = {
|
export type WebScraperOptions = {
|
||||||
urls: string[];
|
urls: string[];
|
||||||
mode: "single_urls" | "sitemap" | "crawl";
|
mode: "single_urls" | "sitemap" | "crawl";
|
||||||
|
Loading…
Reference in New Issue
Block a user