Merge pull request #235 from mendableai/feat/gdrive-pdfs
[Feat] Added custom scraping for google-drive pdf usecase
This commit is contained in:
commit
fc04d5b033
@ -257,12 +257,22 @@ async function handleCustomScraping(
|
||||
text: string,
|
||||
url: string
|
||||
): Promise<FireEngineResponse | null> {
|
||||
// Check for Readme Docs special case
|
||||
if (text.includes('<meta name="readme-deploy"')) {
|
||||
console.log(
|
||||
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
|
||||
);
|
||||
return await scrapWithFireEngine(url, 1000);
|
||||
}
|
||||
|
||||
// Check for Google Drive PDF links in the raw HTML
|
||||
const googleDrivePdfPattern = /https:\/\/drive\.google\.com\/file\/d\/[^\/]+\/view/;
|
||||
const googleDrivePdfLink = text.match(googleDrivePdfPattern);
|
||||
if (googleDrivePdfLink) {
|
||||
console.log(`Google Drive PDF link detected for ${url}: ${googleDrivePdfLink[0]}`);
|
||||
return await scrapWithFireEngine(url, 1000);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user