0

Merge pull request #62 from mendableai/nsc/serper-params

Serper params
This commit is contained in:
Nicolas 2024-04-24 18:13:30 -07:00 committed by GitHub
commit 75e82869ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 39 additions and 8 deletions

View File

@ -29,7 +29,16 @@ export async function searchHelper(
const tbs = searchOptions.tbs ?? null; const tbs = searchOptions.tbs ?? null;
const filter = searchOptions.filter ?? null; const filter = searchOptions.filter ?? null;
let res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter}); let res = await search({
query: query,
advanced: advanced,
num_results: searchOptions.limit ?? 7,
tbs: tbs,
filter: filter,
lang: searchOptions.lang ?? "en",
country: searchOptions.country ?? "us",
location: searchOptions.location,
});
let justSearch = pageOptions.fetchPageContent === false; let justSearch = pageOptions.fetchPageContent === false;

View File

@ -20,6 +20,9 @@ export type SearchOptions = {
limit?: number; limit?: number;
tbs?: string; tbs?: string;
filter?: string; filter?: string;
lang?: string;
country?: string;
location?: string;
}; };
export type WebScraperOptions = { export type WebScraperOptions = {

View File

@ -17,11 +17,12 @@ function get_useragent(): string {
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)]; return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
} }
async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) { async function _req(term: string, results: number, lang: string, country: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) {
const params = { const params = {
"q": term, "q": term,
"num": results, // Number of results to return "num": results, // Number of results to return
"hl": lang, "hl": lang,
"gl": country,
"start": start, "start": start,
}; };
if (tbs) { if (tbs) {
@ -50,7 +51,7 @@ async function _req(term: string, results: number, lang: string, start: number,
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> { export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
const escaped_term = querystring.escape(term); const escaped_term = querystring.escape(term);
let proxies = null; let proxies = null;
@ -70,7 +71,7 @@ export async function google_search(term: string, advanced = false, num_results
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
while (start < num_results && attempts < maxAttempts) { while (start < num_results && attempts < maxAttempts) {
try { try {
const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout, tbs, filter); const resp = await _req(escaped_term, num_results - start, lang, country, start, proxies, timeout, tbs, filter);
const $ = cheerio.load(resp.data); const $ = cheerio.load(resp.data);
const result_block = $("div.g"); const result_block = $("div.g");
if (result_block.length === 0) { if (result_block.length === 0) {

View File

@ -9,6 +9,8 @@ export async function search({
tbs = null, tbs = null,
filter = null, filter = null,
lang = "en", lang = "en",
country = "us",
location = undefined,
proxy = null, proxy = null,
sleep_interval = 0, sleep_interval = 0,
timeout = 5000, timeout = 5000,
@ -19,13 +21,15 @@ export async function search({
tbs?: string; tbs?: string;
filter?: string; filter?: string;
lang?: string; lang?: string;
country?: string;
location?: string;
proxy?: string; proxy?: string;
sleep_interval?: number; sleep_interval?: number;
timeout?: number; timeout?: number;
}) : Promise<SearchResult[]> { }) : Promise<SearchResult[]> {
try { try {
if (process.env.SERPER_API_KEY && !tbs) { if (process.env.SERPER_API_KEY ) {
return await serper_search(query, num_results); return await serper_search(query, {num_results, tbs, filter, lang, country, location});
} }
return await google_search( return await google_search(
query, query,
@ -34,6 +38,7 @@ export async function search({
tbs, tbs,
filter, filter,
lang, lang,
country,
proxy, proxy,
sleep_interval, sleep_interval,
timeout timeout

View File

@ -4,10 +4,23 @@ import { SearchResult } from "../../src/lib/entities";
dotenv.config(); dotenv.config();
export async function serper_search(q, num_results): Promise<SearchResult[]> { export async function serper_search(q, options: {
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
num_results: number;
page?: number;
}): Promise<SearchResult[]> {
let data = JSON.stringify({ let data = JSON.stringify({
q: q, q: q,
num: num_results, hl: options.lang,
gl: options.country,
location: options.location,
tbs: options.tbs,
num: options.num_results,
page: options.page ?? 1,
}); });
let config = { let config = {