Nick: improvements to search
This commit is contained in:
parent
f189589da4
commit
307ea6f5ec
@ -37,7 +37,7 @@ export async function searchHelper(
|
|||||||
return { success: true, data: res, returnCode: 200 };
|
return { success: true, data: res, returnCode: 200 };
|
||||||
}
|
}
|
||||||
|
|
||||||
res = res.filter((r) => !isUrlBlocked(r));
|
res = res.filter((r) => !isUrlBlocked(r.url));
|
||||||
|
|
||||||
if (res.length === 0) {
|
if (res.length === 0) {
|
||||||
return { success: true, error: "No search results found", returnCode: 200 };
|
return { success: true, error: "No search results found", returnCode: 200 };
|
||||||
@ -48,7 +48,7 @@ export async function searchHelper(
|
|||||||
const a = new WebScraperDataProvider();
|
const a = new WebScraperDataProvider();
|
||||||
await a.setOptions({
|
await a.setOptions({
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
urls: res.map((r) => r),
|
urls: res.map((r) => r.url),
|
||||||
crawlerOptions: {
|
crawlerOptions: {
|
||||||
...crawlerOptions,
|
...crawlerOptions,
|
||||||
},
|
},
|
||||||
|
@ -71,3 +71,20 @@ export class Document {
|
|||||||
this.provider = data.provider || undefined;
|
this.provider = data.provider || undefined;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export class SearchResult {
|
||||||
|
url: string;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
|
||||||
|
constructor(url: string, title: string, description: string) {
|
||||||
|
this.url = url;
|
||||||
|
this.title = title;
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
toString(): string {
|
||||||
|
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import * as cheerio from 'cheerio';
|
import * as cheerio from 'cheerio';
|
||||||
import * as querystring from 'querystring';
|
import * as querystring from 'querystring';
|
||||||
|
import { SearchResult } from '../../src/lib/entities';
|
||||||
|
|
||||||
const _useragent_list = [
|
const _useragent_list = [
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
|
||||||
@ -47,23 +48,9 @@ async function _req(term: string, results: number, lang: string, start: number,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class SearchResult {
|
|
||||||
url: string;
|
|
||||||
title: string;
|
|
||||||
description: string;
|
|
||||||
|
|
||||||
constructor(url: string, title: string, description: string) {
|
|
||||||
this.url = url;
|
|
||||||
this.title = title;
|
|
||||||
this.description = description;
|
|
||||||
}
|
|
||||||
|
|
||||||
toString(): string {
|
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
|
||||||
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<string[]> {
|
|
||||||
const escaped_term = querystring.escape(term);
|
const escaped_term = querystring.escape(term);
|
||||||
|
|
||||||
let proxies = null;
|
let proxies = null;
|
||||||
@ -78,7 +65,7 @@ export async function google_search(term: string, advanced = false, num_results
|
|||||||
// TODO: knowledge graph, answer box, etc.
|
// TODO: knowledge graph, answer box, etc.
|
||||||
|
|
||||||
let start = 0;
|
let start = 0;
|
||||||
let results : string[] = [];
|
let results : SearchResult[] = [];
|
||||||
let attempts = 0;
|
let attempts = 0;
|
||||||
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
|
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
|
||||||
while (start < num_results && attempts < maxAttempts) {
|
while (start < num_results && attempts < maxAttempts) {
|
||||||
@ -103,11 +90,7 @@ export async function google_search(term: string, advanced = false, num_results
|
|||||||
const description = description_box.text();
|
const description = description_box.text();
|
||||||
if (link && title && description) {
|
if (link && title && description) {
|
||||||
start += 1;
|
start += 1;
|
||||||
if (advanced) {
|
results.push(new SearchResult(link, title.text(), description));
|
||||||
// results.push(new SearchResult(link, title.text(), description));
|
|
||||||
} else {
|
|
||||||
results.push(link);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import { SearchResult } from "../../src/lib/entities";
|
||||||
import { google_search } from "./googlesearch";
|
import { google_search } from "./googlesearch";
|
||||||
import { serper_search } from "./serper";
|
import { serper_search } from "./serper";
|
||||||
|
|
||||||
@ -21,7 +22,7 @@ export async function search({
|
|||||||
proxy?: string;
|
proxy?: string;
|
||||||
sleep_interval?: number;
|
sleep_interval?: number;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
}) {
|
}) : Promise<SearchResult[]> {
|
||||||
try {
|
try {
|
||||||
if (process.env.SERPER_API_KEY && !tbs) {
|
if (process.env.SERPER_API_KEY && !tbs) {
|
||||||
return await serper_search(query, num_results);
|
return await serper_search(query, num_results);
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
|
import { SearchResult } from "../../src/lib/entities";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
export async function serper_search(q, num_results) : Promise<string[]> {
|
export async function serper_search(q, num_results): Promise<SearchResult[]> {
|
||||||
let data = JSON.stringify({
|
let data = JSON.stringify({
|
||||||
q: q,
|
q: q,
|
||||||
"num": num_results,
|
num: num_results,
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
let config = {
|
let config = {
|
||||||
@ -21,7 +21,11 @@ export async function serper_search(q, num_results) : Promise<string[]> {
|
|||||||
};
|
};
|
||||||
const response = await axios(config);
|
const response = await axios(config);
|
||||||
if (response && response.data && Array.isArray(response.data.organic)) {
|
if (response && response.data && Array.isArray(response.data.organic)) {
|
||||||
return response.data.organic.map((a) => a.link);
|
return response.data.organic.map((a) => ({
|
||||||
|
url: a.link,
|
||||||
|
title: a.title,
|
||||||
|
description: a.snippet,
|
||||||
|
}));
|
||||||
}else{
|
}else{
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user