From 36a0c0fe60ae481f05e5e88ee39353ab4a889193 Mon Sep 17 00:00:00 2001 From: Sawyer Date: Fri, 25 Jul 2025 01:49:58 -0500 Subject: [PATCH] refactor: move config to a seperate file --- config.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ proxy.py | 66 +++------------------------------------------------ 2 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 0000000..ea28e5e --- /dev/null +++ b/config.py @@ -0,0 +1,71 @@ +import argparse +from dataclasses import dataclass + + +@dataclass +class ScraperConfig: + wait_time: float + headless: bool + user_agent: str + + +@dataclass +class ServerConfig: + host: str + port: int + + +def get_configs(): + parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy", + description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML", + usage="") + parser.add_argument( + "--port", + help="Port the proxy runs on.", + required=False, + type=int, + default=32323 + ) + + parser.add_argument( + "--host", + help="Host the proxy to runs on.", + required=False, + type=str, + default="0.0.0.0" + ) + + parser.add_argument( + "--wait", + help="Seconds to wait before returning content.", + required=False, + type=float, + default=10 + ) + + parser.add_argument( + "--headless", + help="Whether or not to run Chrome headless.", + required=False, + type=bool, + default=True + ) + + parser.add_argument( + "--user-agent", + help="Chrome user agent. Changing with the current ChromeDriver version recommended.", + required=False, + type=str, + default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36" + ) + + args = parser.parse_args() + + server_config = ServerConfig(host=args.host, + port=args.port) + + scraper_config = ScraperConfig(wait_time=args.wait, + headless=args.headless, + user_agent=args.user_agent) + + return server_config, scraper_config diff --git a/proxy.py b/proxy.py index 0305e5a..f3b14a6 100644 --- a/proxy.py +++ b/proxy.py @@ -1,8 +1,8 @@ import time -from dataclasses import dataclass -import argparse import atexit +from config import get_configs, ScraperConfig + import undetected_chromedriver as uc from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver import ChromeOptions @@ -10,19 +10,6 @@ from selenium.webdriver import ChromeOptions from flask import Flask, request -@dataclass -class ScraperConfig: - wait_time: float - headless: bool - user_agent: str - - -@dataclass -class ServerConfig: - host: str - port: int - - class Scraper: def __init__(self, config: ScraperConfig): self.config = config @@ -61,54 +48,7 @@ class Scraper: if __name__ == "__main__": - parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy", - description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML", - usage="") - parser.add_argument( - "--port", - help="Port the proxy runs on.", - required=False, - type=int, - default=32323 - ) - - parser.add_argument( - "--host", - help="Host the proxy to runs on.", - required=False, - type=str, - default="0.0.0.0" - ) - - parser.add_argument( - "--wait", - help="Seconds to wait before returning content.", - required=False, - type=float, - default=10 - ) - - parser.add_argument( - "--headless", - help="Whether or not to run Chrome headless.", - required=False, - type=bool, - default=True - ) - - parser.add_argument( - "--user-agent", - help="Chrome user agent. Changing with the current ChromeDriver version recommended.", - required=False, - type=str, - default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36" - - ) - - args = parser.parse_args() - - server_config = ServerConfig(host=args.host, port=args.port) - scraper_config = ScraperConfig(wait_time=args.wait, headless=args.headless, user_agent=args.user_agent) + server_config, scraper_config = get_configs() scraper = Scraper(scraper_config)