Compare commits

..

No commits in common. "8efbbbcf0812c2b9dee61e7b0071687be1646527" and "52f5392ce07236dcd483bbf474c24f2f89e0cd54" have entirely different histories.

9 changed files with 98 additions and 251 deletions

View file

@ -1,12 +0,0 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv
.git

View file

@ -1,57 +0,0 @@
name: Build and Push Docker Image
on:
push:
branches: [ "main", "master" ]
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ secrets.DOCKERHUB_USERNAME }}/${{ github.event.repository.name }}
ghcr.io/${{ github.repository }}
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

View file

@ -1,9 +0,0 @@
# Build (Docker)
Also works with Podman.
```sh
docker build -t 'chromedriver-http-proxy' .
docker run --rm -p "32323:32323" chromedriver-http-proxy
```

View file

@ -1,30 +0,0 @@
FROM ghcr.io/astral-sh/uv:debian-slim
ENV PROXY_PORT=32323
ENV PROXY_HOST=0.0.0.0
ENV SCRAPER_WAIT_TIME=10
ENV SCRAPER_HEADLESS=True
ENV SCRAPER_USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
WORKDIR /app
COPY uv.lock pyproject.toml /app
RUN uv sync --locked
RUN apt-get update && apt-get install -y \
wget \
gnupg \
ca-certificates \
&& wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
&& apt-get update \
&& apt-get install -y google-chrome-stable \
&& rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
COPY . /app
CMD uv run proxy.py \
--port="$PROXY_PORT"\
--host="$PROXY_HOST"\
--wait="$SCRAPER_WAIT_TIME" \
--headless="$SCRAPER_HEADLESS" \
--user-agent="$SCRAPER_USER_AGENT"

View file

@ -6,21 +6,11 @@ Simple HTTP proxy that renders pages with undetected-chromedriver and returns th
- Solves [Anubis](https://anubis.techaro.lol/)
- Solves [go-away](https://git.gammaspectra.live/git/go-away)
- Solves similiar [POW challenges](https://git.gammaspectra.live/git/go-away#other-similar-projects)
- Solves similiar POW challenges
- Sometimes bypasses Cloudflare Turnstile
## Installation
### Container
```sh
docker run --rm -p "32323:32323" ghcr.io/s4wyer/chromedriver-http-proxy # or s44wyer/chromedriver-http-proxy
```
There's also a [Docker compose example](/docker-compose.yml) with better config.
### System
uv:
```sh
@ -62,7 +52,7 @@ This proxy has no authentication, and I don't plan to add any (PRs welcome thoug
## TODO
- [ ] ARM Docker images
- [ ] Docker image
- [ ] Send JS/CSS to the client
- [ ] Custom Chromium binary locations
- [ ] More CLI arguments to control ChromeDriver behavior
@ -70,7 +60,6 @@ This proxy has no authentication, and I don't plan to add any (PRs welcome thoug
- [ ] Screenshot endpoint
- [ ] Allow custom headers
- [ ] POST requests
- [x] Docker image
## Similiar Projects

View file

@ -1,72 +0,0 @@
import os
import argparse
from dataclasses import dataclass
@dataclass
class ScraperConfig:
wait_time: float
headless: bool
user_agent: str
@dataclass
class ServerConfig:
host: str
port: int
def get_configs():
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
usage="")
parser.add_argument(
"--port",
help="Port the proxy runs on.",
required=False,
type=int,
default=os.getenv("PROXY_PORT", 32323)
)
parser.add_argument(
"--host",
help="Host the proxy to runs on.",
required=False,
type=str,
default=os.getenv("PROXY_HOST", "0.0.0.0")
)
parser.add_argument(
"--wait",
help="Seconds to wait before returning content.",
required=False,
type=float,
default=os.getenv("SCRAPER_WAIT_TIME", 10)
)
parser.add_argument(
"--headless",
help="Whether or not to run Chrome headless.",
required=False,
type=bool,
default=os.getenv("SCRAPER_HEADLESS", True)
)
parser.add_argument(
"--user-agent",
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
required=False,
type=str,
default=os.getenv("SCRAPER_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
)
args = parser.parse_args()
server_config = ServerConfig(host=args.host,
port=args.port)
scraper_config = ScraperConfig(wait_time=args.wait,
headless=args.headless,
user_agent=args.user_agent)
return server_config, scraper_config

View file

@ -1,20 +0,0 @@
services:
chromedriver-http-proxy:
image: ghcr.io/s4wyer/chromedriver-http-proxy:latest
container_name: chromedriver-http-proxy
ports:
- "32323:32323"
environment:
- PROXY_PORT=32323
- PROXY_HOST=0.0.0.0
- SCRAPER_WAIT_TIME=10
- SCRAPER_HEADLESS=True
- SCRAPER_USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
# Increase shared memory size to prevent Chromium from crashing.
# Recommended by Selenium https://hub.docker.com/r/selenium/standalone-chrome
shm_size: '2gb'
restart: unless-stopped

124
proxy.py
View file

@ -1,8 +1,6 @@
import time
import atexit
import logging
from config import get_configs, ScraperConfig
from dataclasses import dataclass
import argparse
import undetected_chromedriver as uc
from selenium.webdriver.support.ui import WebDriverWait
@ -11,57 +9,120 @@ from selenium.webdriver import ChromeOptions
from flask import Flask, request
@dataclass
class ScraperConfig:
wait_time: float
headless: bool
user_agent: str
@dataclass
class ServerConfig:
host: str
port: int
class Scraper:
def __init__(self, config: ScraperConfig):
self.config = config
self.driver = None
def __enter__(self):
self._setup_driver()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self._cleanup()
def _cleanup(self):
driver = self.driver
driver.close()
driver.quit()
def _setup_driver(self):
headless = self.config.headless
user_agent = self.config.user_agent
chrome_options = ChromeOptions()
chrome_options.add_argument(f"--user-agent={self.config.user_agent}")
chrome_options.add_argument(f"--user-agent={user_agent}")
self.driver = uc.Chrome(
headless=self.config.headless,
headless=headless,
options=chrome_options,
use_subprocess=False
)
logger.info("Driver started.")
def cleanup(self):
if self.driver:
try:
self.driver.quit()
logger.info("Driver closed.")
except Exception as e:
logger.error(f"Exception during cleanup: {e}")
finally:
self.driver = None
def render_page(self, url):
logger.info(f"Fetching {url}...")
self.driver.get(url)
wait_time = self.config.wait_time
driver = self.driver
WebDriverWait(self.driver, timeout=self.config.wait_time).until(
driver.get(url)
WebDriverWait(self.driver, wait_time).until(
lambda driver: driver.execute_script("return document.readyState") == "complete"
)
time.sleep(self.config.wait_time)
logger.info(f"Fetched {url}.")
time.sleep(wait_time)
return self.driver.page_source
if __name__ == "__main__":
# logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
server_config, scraper_config = get_configs()
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
usage="")
parser.add_argument(
"--port",
help="Port the proxy runs on.",
required=False,
type=int,
default=32323
)
scraper = Scraper(scraper_config)
parser.add_argument(
"--host",
help="Host the proxy to runs on.",
required=False,
type=str,
default="0.0.0.0"
)
atexit.register(scraper.cleanup)
parser.add_argument(
"--wait",
help="Seconds to wait before returning content.",
required=False,
type=float,
default=10
)
parser.add_argument(
"--headless",
help="Whether or not to run Chrome headless.",
required=False,
type=bool,
default=True
)
parser.add_argument(
"--user-agent",
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
required=False,
type=str,
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
)
args = parser.parse_args()
port = args.port
host = args.host
wait = args.wait
headless = args.headless
user_agent = args.user_agent
server_config = ServerConfig(host=host, port=port)
scraper_config = ScraperConfig(wait_time=wait, headless=headless, user_agent=user_agent)
# run the server
app = Flask(__name__)
@ -69,12 +130,11 @@ if __name__ == "__main__":
@app.route("/")
def proxy_route():
url = request.args.get("url")
with Scraper(scraper_config) as scraper:
try:
html = scraper.render_page(url)
return html
logger.info(f"Successfully sent {url} to client.")
except Exception as e:
logger.error(f"Error sending {url} to client: {e}", 500)
print(f"Error: {e}")
app.run(host=server_config.host, port=server_config.port)

View file

@ -3,8 +3,6 @@ name = "playwright-http-proxy"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
license = "AGPL-3.0-only"
license-files = ["LICEN[CS]E*"]
requires-python = ">=3.13"
dependencies = [
"argparse>=1.4.0",