refactor: move config to a seperate file
This commit is contained in:
parent
0dfec4d269
commit
36a0c0fe60
2 changed files with 74 additions and 63 deletions
71
config.py
Normal file
71
config.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
import argparse
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScraperConfig:
|
||||||
|
wait_time: float
|
||||||
|
headless: bool
|
||||||
|
user_agent: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ServerConfig:
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
|
||||||
|
|
||||||
|
def get_configs():
|
||||||
|
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
|
||||||
|
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
|
||||||
|
usage="")
|
||||||
|
parser.add_argument(
|
||||||
|
"--port",
|
||||||
|
help="Port the proxy runs on.",
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=32323
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--host",
|
||||||
|
help="Host the proxy to runs on.",
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default="0.0.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--wait",
|
||||||
|
help="Seconds to wait before returning content.",
|
||||||
|
required=False,
|
||||||
|
type=float,
|
||||||
|
default=10
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--headless",
|
||||||
|
help="Whether or not to run Chrome headless.",
|
||||||
|
required=False,
|
||||||
|
type=bool,
|
||||||
|
default=True
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--user-agent",
|
||||||
|
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
server_config = ServerConfig(host=args.host,
|
||||||
|
port=args.port)
|
||||||
|
|
||||||
|
scraper_config = ScraperConfig(wait_time=args.wait,
|
||||||
|
headless=args.headless,
|
||||||
|
user_agent=args.user_agent)
|
||||||
|
|
||||||
|
return server_config, scraper_config
|
66
proxy.py
66
proxy.py
|
@ -1,8 +1,8 @@
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
|
||||||
import argparse
|
|
||||||
import atexit
|
import atexit
|
||||||
|
|
||||||
|
from config import get_configs, ScraperConfig
|
||||||
|
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver import ChromeOptions
|
from selenium.webdriver import ChromeOptions
|
||||||
|
@ -10,19 +10,6 @@ from selenium.webdriver import ChromeOptions
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ScraperConfig:
|
|
||||||
wait_time: float
|
|
||||||
headless: bool
|
|
||||||
user_agent: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ServerConfig:
|
|
||||||
host: str
|
|
||||||
port: int
|
|
||||||
|
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
def __init__(self, config: ScraperConfig):
|
def __init__(self, config: ScraperConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
@ -61,54 +48,7 @@ class Scraper:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
|
server_config, scraper_config = get_configs()
|
||||||
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
|
|
||||||
usage="")
|
|
||||||
parser.add_argument(
|
|
||||||
"--port",
|
|
||||||
help="Port the proxy runs on.",
|
|
||||||
required=False,
|
|
||||||
type=int,
|
|
||||||
default=32323
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--host",
|
|
||||||
help="Host the proxy to runs on.",
|
|
||||||
required=False,
|
|
||||||
type=str,
|
|
||||||
default="0.0.0.0"
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--wait",
|
|
||||||
help="Seconds to wait before returning content.",
|
|
||||||
required=False,
|
|
||||||
type=float,
|
|
||||||
default=10
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--headless",
|
|
||||||
help="Whether or not to run Chrome headless.",
|
|
||||||
required=False,
|
|
||||||
type=bool,
|
|
||||||
default=True
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--user-agent",
|
|
||||||
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
|
|
||||||
required=False,
|
|
||||||
type=str,
|
|
||||||
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
server_config = ServerConfig(host=args.host, port=args.port)
|
|
||||||
scraper_config = ScraperConfig(wait_time=args.wait, headless=args.headless, user_agent=args.user_agent)
|
|
||||||
|
|
||||||
scraper = Scraper(scraper_config)
|
scraper = Scraper(scraper_config)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue