chromedriver-http-proxy/proxy.py
2025-07-23 01:06:26 -05:00

140 lines
3.4 KiB
Python

import time
from dataclasses import dataclass
import argparse
import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ChromeOptions
from flask import Flask, request
@dataclass
class ScraperConfig:
wait_time: float
headless: bool
user_agent: str
@dataclass
class ServerConfig:
host: str
port: int
class Scraper:
def __init__(self, config: ScraperConfig):
self.config = config
self.driver = None
def __enter__(self):
self._setup_driver()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._cleanup()
def _cleanup(self):
driver = self.driver
driver.close()
driver.quit()
def _setup_driver(self):
headless = self.config.headless
user_agent = self.config.user_agent
chrome_options = ChromeOptions()
chrome_options.add_argument(f"--user-agent={user_agent}")
self.driver = uc.Chrome(
headless=headless,
options=chrome_options,
use_subprocess=False
)
def render_page(self, url):
wait_time = self.config.wait_time
driver = self.driver
driver.get(url)
WebDriverWait(self.driver, wait_time).until(
lambda driver: driver.execute_script("return document.readyState") == "complete"
)
time.sleep(wait_time)
return self.driver.page_source
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
usage="")
parser.add_argument(
"--port",
help="Port the proxy runs on.",
required=False,
type=int,
default=32323
)
parser.add_argument(
"--host",
help="Host the proxy to runs on.",
required=False,
type=str,
default="0.0.0.0"
)
parser.add_argument(
"--wait",
help="Seconds to wait before returning content.",
required=False,
type=float,
default=10
)
parser.add_argument(
"--headless",
help="Whether or not to run Chrome headless.",
required=False,
type=bool,
default=True
)
parser.add_argument(
"--user-agent",
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
required=False,
type=str,
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
)
args = parser.parse_args()
port = args.port
host = args.host
wait = args.wait
headless = args.headless
user_agent = args.user_agent
server_config = ServerConfig(host=host, port=port)
scraper_config = ScraperConfig(wait_time=wait, headless=headless, user_agent=user_agent)
# run the server
app = Flask(__name__)
@app.route("/")
def proxy_route():
url = request.args.get("url")
with Scraper(scraper_config) as scraper:
try:
html = scraper.render_page(url)
return html
except Exception as e:
print(f"Error: {e}")
app.run(host=server_config.host, port=server_config.port)