Update single_url.ts
This commit is contained in:
parent
93f3098672
commit
5ae4d1caf5
@ -257,12 +257,22 @@ async function handleCustomScraping(
|
|||||||
text: string,
|
text: string,
|
||||||
url: string
|
url: string
|
||||||
): Promise<FireEngineResponse | null> {
|
): Promise<FireEngineResponse | null> {
|
||||||
|
// Check for Readme Docs special case
|
||||||
if (text.includes('<meta name="readme-deploy"')) {
|
if (text.includes('<meta name="readme-deploy"')) {
|
||||||
console.log(
|
console.log(
|
||||||
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
|
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
|
||||||
);
|
);
|
||||||
return await scrapWithFireEngine(url, 1000);
|
return await scrapWithFireEngine(url, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for Google Drive PDF links in the raw HTML
|
||||||
|
const googleDrivePdfPattern = /https:\/\/drive\.google\.com\/file\/d\/[^\/]+\/view/;
|
||||||
|
const googleDrivePdfLink = text.match(googleDrivePdfPattern);
|
||||||
|
if (googleDrivePdfLink) {
|
||||||
|
console.log(`Google Drive PDF link detected for ${url}: ${googleDrivePdfLink[0]}`);
|
||||||
|
return await scrapWithFireEngine(url, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user