From b001aded46a620021f0265db683c17ab3fc46793 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Fri, 24 May 2024 17:41:34 +0200 Subject: [PATCH] Add proxy and media blocking configurations Updated environment variables and application settings to include proxy configurations and media blocking option. The proxy settings allow users to use a proxy service, while the media blocking is an optional feature that can help save bandwidth. Changes have been made in the .env.example, docker-compose.yaml, and main.py files. --- apps/api/.env.example | 9 ++++++++- apps/playwright-service/main.py | 20 +++++++++++++++++++- docker-compose.yaml | 4 ++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/apps/api/.env.example b/apps/api/.env.example index 659d68f..0ba20e8 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -35,4 +35,11 @@ STRIPE_PRICE_ID_SCALE= HYPERDX_API_KEY= HDX_NODE_BETA_MODE=1 -FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta \ No newline at end of file +FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta + +# Proxy Settings (Alternative you can can use a proxy service like oxylabs, which rotates IPs for you on every request) +PROXY_SERVER= +PROXY_USERNAME= +PROXY_PASSWORD= +# set if you'd like to block media requests to save proxy bandwidth +BLOCK_MEDIA= \ No newline at end of file diff --git a/apps/playwright-service/main.py b/apps/playwright-service/main.py index c28bc63..337d283 100644 --- a/apps/playwright-service/main.py +++ b/apps/playwright-service/main.py @@ -2,9 +2,16 @@ from fastapi import FastAPI from playwright.async_api import async_playwright, Browser from fastapi.responses import JSONResponse from pydantic import BaseModel +from os import environ + +PROXY_SERVER = environ.get('PROXY_SERVER', None) +PROXY_USERNAME = environ.get('PROXY_USERNAME', None) +PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None) +BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE' app = FastAPI() + class UrlModel(BaseModel): url: str wait: int = None @@ -27,7 +34,18 @@ async def shutdown_event(): @app.post("/html") async def root(body: UrlModel): - context = await browser.new_context() + context = None + if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD: + context = await browser.new_context(proxy={"server": PROXY_SERVER, + "username": PROXY_USERNAME, + "password": PROXY_PASSWORD}) + else: + context = await browser.new_context() + + if BLOCK_MEDIA: + await context.route("**/*.{png,jpg,jpeg,gif,svg,mp3,mp4,avi,flac,ogg,wav,webm}", + handler=lambda route, request: route.abort()) + page = await context.new_page() await page.goto(body.url, timeout=15000) # Set max timeout to 15s if body.wait: # Check if wait parameter is provided in the request body diff --git a/docker-compose.yaml b/docker-compose.yaml index 049672d..c95ccc9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -5,6 +5,10 @@ services: build: apps/playwright-service environment: - PORT=3000 + - PROXY_SERVER=${PROXY_SERVER} + - PROXY_USERNAME=${PROXY_USERNAME} + - PROXY_PASSWORD=${PROXY_PASSWORD} + - BLOCK_MEDIA=${BLOCK_MEDIA} networks: - backend