refactor: move config to a seperate file

This commit is contained in:
Sawyer 2025-07-25 01:49:58 -05:00
parent 0dfec4d269
commit 36a0c0fe60
2 changed files with 74 additions and 63 deletions

71
config.py Normal file
View file

@ -0,0 +1,71 @@
import argparse
from dataclasses import dataclass
@dataclass
class ScraperConfig:
wait_time: float
headless: bool
user_agent: str
@dataclass
class ServerConfig:
host: str
port: int
def get_configs():
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
usage="")
parser.add_argument(
"--port",
help="Port the proxy runs on.",
required=False,
type=int,
default=32323
)
parser.add_argument(
"--host",
help="Host the proxy to runs on.",
required=False,
type=str,
default="0.0.0.0"
)
parser.add_argument(
"--wait",
help="Seconds to wait before returning content.",
required=False,
type=float,
default=10
)
parser.add_argument(
"--headless",
help="Whether or not to run Chrome headless.",
required=False,
type=bool,
default=True
)
parser.add_argument(
"--user-agent",
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
required=False,
type=str,
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
)
args = parser.parse_args()
server_config = ServerConfig(host=args.host,
port=args.port)
scraper_config = ScraperConfig(wait_time=args.wait,
headless=args.headless,
user_agent=args.user_agent)
return server_config, scraper_config

View file

@ -1,8 +1,8 @@
import time import time
from dataclasses import dataclass
import argparse
import atexit import atexit
from config import get_configs, ScraperConfig
import undetected_chromedriver as uc import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ChromeOptions from selenium.webdriver import ChromeOptions
@ -10,19 +10,6 @@ from selenium.webdriver import ChromeOptions
from flask import Flask, request from flask import Flask, request
@dataclass
class ScraperConfig:
wait_time: float
headless: bool
user_agent: str
@dataclass
class ServerConfig:
host: str
port: int
class Scraper: class Scraper:
def __init__(self, config: ScraperConfig): def __init__(self, config: ScraperConfig):
self.config = config self.config = config
@ -61,54 +48,7 @@ class Scraper:
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy", server_config, scraper_config = get_configs()
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
usage="")
parser.add_argument(
"--port",
help="Port the proxy runs on.",
required=False,
type=int,
default=32323
)
parser.add_argument(
"--host",
help="Host the proxy to runs on.",
required=False,
type=str,
default="0.0.0.0"
)
parser.add_argument(
"--wait",
help="Seconds to wait before returning content.",
required=False,
type=float,
default=10
)
parser.add_argument(
"--headless",
help="Whether or not to run Chrome headless.",
required=False,
type=bool,
default=True
)
parser.add_argument(
"--user-agent",
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
required=False,
type=str,
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
)
args = parser.parse_args()
server_config = ServerConfig(host=args.host, port=args.port)
scraper_config = ScraperConfig(wait_time=args.wait, headless=args.headless, user_agent=args.user_agent)
scraper = Scraper(scraper_config) scraper = Scraper(scraper_config)