perf: keep the same instance of chromedriver open rather than open a new one

- remove context manager implementation
- register a cleanup handler
- send a 500 error if selenium throws an exception
This commit is contained in:
Sawyer 2025-07-25 01:38:09 -05:00
parent ca91190291
commit 0dfec4d269

View file

@ -1,6 +1,7 @@
import time import time
from dataclasses import dataclass from dataclasses import dataclass
import argparse import argparse
import atexit
import undetected_chromedriver as uc import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
@ -26,18 +27,7 @@ class Scraper:
def __init__(self, config: ScraperConfig): def __init__(self, config: ScraperConfig):
self.config = config self.config = config
self.driver = None self.driver = None
def __enter__(self):
self._setup_driver() self._setup_driver()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._cleanup()
def _cleanup(self):
driver = self.driver
driver.close()
driver.quit()
def _setup_driver(self): def _setup_driver(self):
chrome_options = ChromeOptions() chrome_options = ChromeOptions()
@ -49,17 +39,23 @@ class Scraper:
use_subprocess=False use_subprocess=False
) )
def cleanup(self):
if self.driver:
try:
self.driver.quit()
except Exception as e:
print(f"Error during cleanup: {e}")
finally:
self.driver = None
def render_page(self, url): def render_page(self, url):
wait_time = self.config.wait_time self.driver.get(url)
driver = self.driver
driver.get(url) WebDriverWait(self.driver, timeout=self.config.wait_time).until(
WebDriverWait(self.driver, wait_time).until(
lambda driver: driver.execute_script("return document.readyState") == "complete" lambda driver: driver.execute_script("return document.readyState") == "complete"
) )
time.sleep(wait_time) time.sleep(self.config.wait_time)
return self.driver.page_source return self.driver.page_source
@ -111,15 +107,12 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
port = args.port server_config = ServerConfig(host=args.host, port=args.port)
host = args.host scraper_config = ScraperConfig(wait_time=args.wait, headless=args.headless, user_agent=args.user_agent)
wait = args.wait scraper = Scraper(scraper_config)
headless = args.headless
user_agent = args.user_agent
server_config = ServerConfig(host=host, port=port) atexit.register(scraper.cleanup)
scraper_config = ScraperConfig(wait_time=wait, headless=headless, user_agent=user_agent)
# run the server # run the server
app = Flask(__name__) app = Flask(__name__)
@ -127,11 +120,11 @@ if __name__ == "__main__":
@app.route("/") @app.route("/")
def proxy_route(): def proxy_route():
url = request.args.get("url") url = request.args.get("url")
with Scraper(scraper_config) as scraper:
try: try:
html = scraper.render_page(url) html = scraper.render_page(url)
return html return html
except Exception as e: except Exception as e:
print(f"Error: {e}") print(f"Error: {e}", 500)
app.run(host=server_config.host, port=server_config.port) app.run(host=server_config.host, port=server_config.port)