Nick: improvements to search
This commit is contained in:
parent
f189589da4
commit
307ea6f5ec
@ -37,7 +37,7 @@ export async function searchHelper(
|
||||
return { success: true, data: res, returnCode: 200 };
|
||||
}
|
||||
|
||||
res = res.filter((r) => !isUrlBlocked(r));
|
||||
res = res.filter((r) => !isUrlBlocked(r.url));
|
||||
|
||||
if (res.length === 0) {
|
||||
return { success: true, error: "No search results found", returnCode: 200 };
|
||||
@ -48,7 +48,7 @@ export async function searchHelper(
|
||||
const a = new WebScraperDataProvider();
|
||||
await a.setOptions({
|
||||
mode: "single_urls",
|
||||
urls: res.map((r) => r),
|
||||
urls: res.map((r) => r.url),
|
||||
crawlerOptions: {
|
||||
...crawlerOptions,
|
||||
},
|
||||
|
@ -71,3 +71,20 @@ export class Document {
|
||||
this.provider = data.provider || undefined;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export class SearchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
description: string;
|
||||
|
||||
constructor(url: string, title: string, description: string) {
|
||||
this.url = url;
|
||||
this.title = title;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import * as querystring from 'querystring';
|
||||
import { SearchResult } from '../../src/lib/entities';
|
||||
|
||||
const _useragent_list = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||
@ -47,23 +48,9 @@ async function _req(term: string, results: number, lang: string, start: number,
|
||||
}
|
||||
}
|
||||
|
||||
class SearchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
description: string;
|
||||
|
||||
constructor(url: string, title: string, description: string) {
|
||||
this.url = url;
|
||||
this.title = title;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
|
||||
}
|
||||
}
|
||||
|
||||
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<string[]> {
|
||||
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
|
||||
const escaped_term = querystring.escape(term);
|
||||
|
||||
let proxies = null;
|
||||
@ -78,7 +65,7 @@ export async function google_search(term: string, advanced = false, num_results
|
||||
// TODO: knowledge graph, answer box, etc.
|
||||
|
||||
let start = 0;
|
||||
let results : string[] = [];
|
||||
let results : SearchResult[] = [];
|
||||
let attempts = 0;
|
||||
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
|
||||
while (start < num_results && attempts < maxAttempts) {
|
||||
@ -103,11 +90,7 @@ export async function google_search(term: string, advanced = false, num_results
|
||||
const description = description_box.text();
|
||||
if (link && title && description) {
|
||||
start += 1;
|
||||
if (advanced) {
|
||||
// results.push(new SearchResult(link, title.text(), description));
|
||||
} else {
|
||||
results.push(link);
|
||||
}
|
||||
results.push(new SearchResult(link, title.text(), description));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -1,3 +1,4 @@
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
import { google_search } from "./googlesearch";
|
||||
import { serper_search } from "./serper";
|
||||
|
||||
@ -21,7 +22,7 @@ export async function search({
|
||||
proxy?: string;
|
||||
sleep_interval?: number;
|
||||
timeout?: number;
|
||||
}) {
|
||||
}) : Promise<SearchResult[]> {
|
||||
try {
|
||||
if (process.env.SERPER_API_KEY && !tbs) {
|
||||
return await serper_search(query, num_results);
|
||||
|
@ -1,13 +1,13 @@
|
||||
import axios from "axios";
|
||||
import dotenv from "dotenv";
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
export async function serper_search(q, num_results) : Promise<string[]> {
|
||||
export async function serper_search(q, num_results): Promise<SearchResult[]> {
|
||||
let data = JSON.stringify({
|
||||
q: q,
|
||||
"num": num_results,
|
||||
|
||||
num: num_results,
|
||||
});
|
||||
|
||||
let config = {
|
||||
@ -21,8 +21,12 @@ export async function serper_search(q, num_results) : Promise<string[]> {
|
||||
};
|
||||
const response = await axios(config);
|
||||
if (response && response.data && Array.isArray(response.data.organic)) {
|
||||
return response.data.organic.map((a) => a.link);
|
||||
} else {
|
||||
return response.data.organic.map((a) => ({
|
||||
url: a.link,
|
||||
title: a.title,
|
||||
description: a.snippet,
|
||||
}));
|
||||
}else{
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user