2024-04-25 10:31:28 -07:00
|
|
|
from fastapi import FastAPI
|
|
|
|
from playwright.async_api import async_playwright, Browser
|
2024-04-15 17:01:47 -04:00
|
|
|
from fastapi.responses import JSONResponse
|
|
|
|
from pydantic import BaseModel
|
2024-05-24 17:41:34 +02:00
|
|
|
from os import environ
|
|
|
|
|
|
|
|
PROXY_SERVER = environ.get('PROXY_SERVER', None)
|
|
|
|
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
|
|
|
|
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
|
|
|
|
BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
|
2024-04-25 10:31:28 -07:00
|
|
|
|
2024-04-15 17:01:47 -04:00
|
|
|
app = FastAPI()
|
|
|
|
|
2024-05-24 17:41:34 +02:00
|
|
|
|
2024-04-15 17:01:47 -04:00
|
|
|
class UrlModel(BaseModel):
|
|
|
|
url: str
|
2024-05-09 18:00:58 -07:00
|
|
|
wait: int = None
|
2024-04-15 17:01:47 -04:00
|
|
|
|
|
|
|
|
2024-04-25 10:31:28 -07:00
|
|
|
browser: Browser = None
|
|
|
|
|
|
|
|
|
|
|
|
@app.on_event("startup")
|
|
|
|
async def startup_event():
|
|
|
|
global browser
|
|
|
|
playwright = await async_playwright().start()
|
|
|
|
browser = await playwright.chromium.launch()
|
|
|
|
|
2024-04-15 17:01:47 -04:00
|
|
|
|
2024-04-25 10:31:28 -07:00
|
|
|
@app.on_event("shutdown")
|
|
|
|
async def shutdown_event():
|
|
|
|
await browser.close()
|
2024-04-15 17:01:47 -04:00
|
|
|
|
|
|
|
|
2024-04-25 10:31:28 -07:00
|
|
|
@app.post("/html")
|
|
|
|
async def root(body: UrlModel):
|
2024-05-24 17:41:34 +02:00
|
|
|
context = None
|
|
|
|
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
|
|
|
|
context = await browser.new_context(proxy={"server": PROXY_SERVER,
|
|
|
|
"username": PROXY_USERNAME,
|
|
|
|
"password": PROXY_PASSWORD})
|
|
|
|
else:
|
|
|
|
context = await browser.new_context()
|
|
|
|
|
|
|
|
if BLOCK_MEDIA:
|
|
|
|
await context.route("**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}",
|
|
|
|
handler=lambda route, request: route.abort())
|
|
|
|
|
2024-04-25 10:31:28 -07:00
|
|
|
page = await context.new_page()
|
2024-05-21 14:53:57 +08:00
|
|
|
await page.goto(
|
|
|
|
body.url,
|
|
|
|
wait_until="load",
|
2024-05-22 10:45:43 -07:00
|
|
|
timeout=body.timeout if body.timeout else 15000,
|
2024-05-21 14:53:57 +08:00
|
|
|
)
|
2024-05-22 10:45:43 -07:00
|
|
|
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
|
|
|
|
if body.wait:
|
|
|
|
await page.wait_for_timeout(body.wait)
|
2024-05-22 12:59:56 -07:00
|
|
|
|
2024-04-25 10:31:28 -07:00
|
|
|
page_content = await page.content()
|
|
|
|
await context.close()
|
|
|
|
json_compatible_item_data = {"content": page_content}
|
|
|
|
return JSONResponse(content=json_compatible_item_data)
|