chromedriver-http-proxy/proxy.py

80 lines
2.1 KiB
Python

import time
import atexit
import logging
from config import get_configs, ScraperConfig
import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ChromeOptions
from flask import Flask, request
class Scraper:
def __init__(self, config: ScraperConfig):
self.config = config
self.driver = None
self._setup_driver()
def _setup_driver(self):
chrome_options = ChromeOptions()
chrome_options.add_argument(f"--user-agent={self.config.user_agent}")
self.driver = uc.Chrome(
headless=self.config.headless,
options=chrome_options,
use_subprocess=False
)
logger.info("Driver started.")
def cleanup(self):
if self.driver:
try:
self.driver.quit()
logger.info("Driver closed.")
except Exception as e:
logger.error(f"Exception during cleanup: {e}")
finally:
self.driver = None
def render_page(self, url):
logger.info(f"Fetching {url}...")
self.driver.get(url)
WebDriverWait(self.driver, timeout=self.config.wait_time).until(
lambda driver: driver.execute_script("return document.readyState") == "complete"
)
time.sleep(self.config.wait_time)
logger.info(f"Fetched {url}.")
return self.driver.page_source
if __name__ == "__main__":
# logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
server_config, scraper_config = get_configs()
scraper = Scraper(scraper_config)
atexit.register(scraper.cleanup)
# run the server
app = Flask(__name__)
@app.route("/")
def proxy_route():
url = request.args.get("url")
try:
html = scraper.render_page(url)
return html
logger.info(f"Successfully sent {url} to client.")
except Exception as e:
logger.error(f"Error sending {url} to client: {e}", 500)
app.run(host=server_config.host, port=server_config.port)