Merge pull request #239 from mendableai/feat/scroll-xpaths
[Feat] Added scroll xpaths on fire-engine for handling readme docs
This commit is contained in:
commit
6d76037f6d
@ -1,7 +1,7 @@
|
|||||||
export async function handleCustomScraping(
|
export async function handleCustomScraping(
|
||||||
text: string,
|
text: string,
|
||||||
url: string
|
url: string
|
||||||
): Promise<{ scraper: string; url: string; wait_after_load: number } | null> {
|
): Promise<{ scraper: string; url: string; waitAfterLoad: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
|
||||||
// Check for Readme Docs special case
|
// Check for Readme Docs special case
|
||||||
if (text.includes('<meta name="readme-deploy"')) {
|
if (text.includes('<meta name="readme-deploy"')) {
|
||||||
console.log(
|
console.log(
|
||||||
@ -10,7 +10,10 @@ export async function handleCustomScraping(
|
|||||||
return {
|
return {
|
||||||
scraper: "fire-engine",
|
scraper: "fire-engine",
|
||||||
url: url,
|
url: url,
|
||||||
wait_after_load: 1000,
|
waitAfterLoad: 1000,
|
||||||
|
pageOptions: {
|
||||||
|
scrollXPaths: ['//*[@id="ReferencePlayground"]/section[3]/div/pre/div/div/div[5]']
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,7 +25,7 @@ export async function handleCustomScraping(
|
|||||||
return {
|
return {
|
||||||
scraper: "fire-engine",
|
scraper: "fire-engine",
|
||||||
url: url,
|
url: url,
|
||||||
wait_after_load: 3000,
|
waitAfterLoad: 3000,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,7 +40,7 @@ export async function handleCustomScraping(
|
|||||||
return {
|
return {
|
||||||
scraper: "fire-engine",
|
scraper: "fire-engine",
|
||||||
url: url,
|
url: url,
|
||||||
wait_after_load: 1000,
|
waitAfterLoad: 1000,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,6 +46,7 @@ export async function scrapWithFireEngine(
|
|||||||
url: string,
|
url: string,
|
||||||
waitFor: number = 0,
|
waitFor: number = 0,
|
||||||
screenshot: boolean = false,
|
screenshot: boolean = false,
|
||||||
|
pageOptions: { scrollXPaths?: string[] } = {},
|
||||||
headers?: Record<string, string>,
|
headers?: Record<string, string>,
|
||||||
options?: any
|
options?: any
|
||||||
): Promise<FireEngineResponse> {
|
): Promise<FireEngineResponse> {
|
||||||
@ -68,6 +69,7 @@ export async function scrapWithFireEngine(
|
|||||||
wait: waitParam,
|
wait: waitParam,
|
||||||
screenshot: screenshotParam,
|
screenshot: screenshotParam,
|
||||||
headers: headers,
|
headers: headers,
|
||||||
|
pageOptions: pageOptions
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -332,7 +334,7 @@ export async function scrapSingleUrl(
|
|||||||
const customScraperResult = await handleCustomScraping(text, url);
|
const customScraperResult = await handleCustomScraping(text, url);
|
||||||
|
|
||||||
if(customScraperResult){
|
if(customScraperResult){
|
||||||
customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.wait_after_load)
|
customScrapedContent = await scrapWithFireEngine(customScraperResult.url, customScraperResult.waitAfterLoad, false, customScraperResult.pageOptions)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (customScrapedContent) {
|
if (customScrapedContent) {
|
||||||
|
Loading…
Reference in New Issue
Block a user