perf: keep the same instance of chromedriver open rather than open a new one
- remove context manager implementation - register a cleanup handler - send a 500 error if selenium throws an exception
This commit is contained in:
parent
ca91190291
commit
0dfec4d269
1 changed files with 23 additions and 30 deletions
45
proxy.py
45
proxy.py
|
@ -1,6 +1,7 @@
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import argparse
|
import argparse
|
||||||
|
import atexit
|
||||||
|
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
@ -26,18 +27,7 @@ class Scraper:
|
||||||
def __init__(self, config: ScraperConfig):
|
def __init__(self, config: ScraperConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.driver = None
|
self.driver = None
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self._setup_driver()
|
self._setup_driver()
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
self._cleanup()
|
|
||||||
|
|
||||||
def _cleanup(self):
|
|
||||||
driver = self.driver
|
|
||||||
driver.close()
|
|
||||||
driver.quit()
|
|
||||||
|
|
||||||
def _setup_driver(self):
|
def _setup_driver(self):
|
||||||
chrome_options = ChromeOptions()
|
chrome_options = ChromeOptions()
|
||||||
|
@ -49,17 +39,23 @@ class Scraper:
|
||||||
use_subprocess=False
|
use_subprocess=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
if self.driver:
|
||||||
|
try:
|
||||||
|
self.driver.quit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during cleanup: {e}")
|
||||||
|
finally:
|
||||||
|
self.driver = None
|
||||||
|
|
||||||
def render_page(self, url):
|
def render_page(self, url):
|
||||||
wait_time = self.config.wait_time
|
self.driver.get(url)
|
||||||
driver = self.driver
|
|
||||||
|
|
||||||
driver.get(url)
|
WebDriverWait(self.driver, timeout=self.config.wait_time).until(
|
||||||
|
|
||||||
WebDriverWait(self.driver, wait_time).until(
|
|
||||||
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
||||||
)
|
)
|
||||||
|
|
||||||
time.sleep(wait_time)
|
time.sleep(self.config.wait_time)
|
||||||
|
|
||||||
return self.driver.page_source
|
return self.driver.page_source
|
||||||
|
|
||||||
|
@ -111,15 +107,12 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
port = args.port
|
server_config = ServerConfig(host=args.host, port=args.port)
|
||||||
host = args.host
|
scraper_config = ScraperConfig(wait_time=args.wait, headless=args.headless, user_agent=args.user_agent)
|
||||||
|
|
||||||
wait = args.wait
|
scraper = Scraper(scraper_config)
|
||||||
headless = args.headless
|
|
||||||
user_agent = args.user_agent
|
|
||||||
|
|
||||||
server_config = ServerConfig(host=host, port=port)
|
atexit.register(scraper.cleanup)
|
||||||
scraper_config = ScraperConfig(wait_time=wait, headless=headless, user_agent=user_agent)
|
|
||||||
|
|
||||||
# run the server
|
# run the server
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
@ -127,11 +120,11 @@ if __name__ == "__main__":
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def proxy_route():
|
def proxy_route():
|
||||||
url = request.args.get("url")
|
url = request.args.get("url")
|
||||||
with Scraper(scraper_config) as scraper:
|
|
||||||
try:
|
try:
|
||||||
html = scraper.render_page(url)
|
html = scraper.render_page(url)
|
||||||
return html
|
return html
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error: {e}")
|
print(f"Error: {e}", 500)
|
||||||
|
|
||||||
app.run(host=server_config.host, port=server_config.port)
|
app.run(host=server_config.host, port=server_config.port)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue