0

Merge pull request #62 from mendableai/nsc/serper-params

Serper params
This commit is contained in:
Nicolas 2024-04-24 18:13:30 -07:00 committed by GitHub
commit 75e82869ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 39 additions and 8 deletions

View File

@ -29,7 +29,16 @@ export async function searchHelper(
const tbs = searchOptions.tbs ?? null;
const filter = searchOptions.filter ?? null;
let res = await search({query: query, advanced: advanced, num_results: searchOptions.limit ?? 7, tbs: tbs, filter: filter});
let res = await search({
query: query,
advanced: advanced,
num_results: searchOptions.limit ?? 7,
tbs: tbs,
filter: filter,
lang: searchOptions.lang ?? "en",
country: searchOptions.country ?? "us",
location: searchOptions.location,
});
let justSearch = pageOptions.fetchPageContent === false;

View File

@ -20,6 +20,9 @@ export type SearchOptions = {
limit?: number;
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
};
export type WebScraperOptions = {

View File

@ -17,11 +17,12 @@ function get_useragent(): string {
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
}
async function _req(term: string, results: number, lang: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) {
async function _req(term: string, results: number, lang: string, country: string, start: number, proxies: any, timeout: number, tbs: string = null, filter: string = null) {
const params = {
"q": term,
"num": results, // Number of results to return
"hl": lang,
"gl": country,
"start": start,
};
if (tbs) {
@ -50,7 +51,7 @@ async function _req(term: string, results: number, lang: string, start: number,
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
const escaped_term = querystring.escape(term);
let proxies = null;
@ -70,7 +71,7 @@ export async function google_search(term: string, advanced = false, num_results
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
while (start < num_results && attempts < maxAttempts) {
try {
const resp = await _req(escaped_term, num_results - start, lang, start, proxies, timeout, tbs, filter);
const resp = await _req(escaped_term, num_results - start, lang, country, start, proxies, timeout, tbs, filter);
const $ = cheerio.load(resp.data);
const result_block = $("div.g");
if (result_block.length === 0) {

View File

@ -9,6 +9,8 @@ export async function search({
tbs = null,
filter = null,
lang = "en",
country = "us",
location = undefined,
proxy = null,
sleep_interval = 0,
timeout = 5000,
@ -19,13 +21,15 @@ export async function search({
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
proxy?: string;
sleep_interval?: number;
timeout?: number;
}) : Promise<SearchResult[]> {
try {
if (process.env.SERPER_API_KEY && !tbs) {
return await serper_search(query, num_results);
if (process.env.SERPER_API_KEY ) {
return await serper_search(query, {num_results, tbs, filter, lang, country, location});
}
return await google_search(
query,
@ -34,6 +38,7 @@ export async function search({
tbs,
filter,
lang,
country,
proxy,
sleep_interval,
timeout

View File

@ -4,10 +4,23 @@ import { SearchResult } from "../../src/lib/entities";
dotenv.config();
export async function serper_search(q, num_results): Promise<SearchResult[]> {
export async function serper_search(q, options: {
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
num_results: number;
page?: number;
}): Promise<SearchResult[]> {
let data = JSON.stringify({
q: q,
num: num_results,
hl: options.lang,
gl: options.country,
location: options.location,
tbs: options.tbs,
num: options.num_results,
page: options.page ?? 1,
});
let config = {