Compare commits
No commits in common. "8efbbbcf0812c2b9dee61e7b0071687be1646527" and "52f5392ce07236dcd483bbf474c24f2f89e0cd54" have entirely different histories.
8efbbbcf08
...
52f5392ce0
9 changed files with 98 additions and 251 deletions
|
@ -1,12 +0,0 @@
|
||||||
# Python-generated files
|
|
||||||
__pycache__/
|
|
||||||
*.py[oc]
|
|
||||||
build/
|
|
||||||
dist/
|
|
||||||
wheels/
|
|
||||||
*.egg-info
|
|
||||||
|
|
||||||
# Virtual environments
|
|
||||||
.venv
|
|
||||||
|
|
||||||
.git
|
|
57
.github/workflows/build-and-publish.yml
vendored
57
.github/workflows/build-and-publish.yml
vendored
|
@ -1,57 +0,0 @@
|
||||||
name: Build and Push Docker Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ "main", "master" ]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Log in to GHCR
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Extract metadata (tags, labels) for Docker
|
|
||||||
id: meta
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
${{ secrets.DOCKERHUB_USERNAME }}/${{ github.event.repository.name }}
|
|
||||||
ghcr.io/${{ github.repository }}
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{version}}
|
|
||||||
type=semver,pattern={{major}}.{{minor}}
|
|
||||||
type=raw,value=latest,enable={{is_default_branch}}
|
|
||||||
|
|
||||||
- name: Build and push Docker image
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
platforms: linux/amd64
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
9
BUILD.md
9
BUILD.md
|
@ -1,9 +0,0 @@
|
||||||
# Build (Docker)
|
|
||||||
|
|
||||||
Also works with Podman.
|
|
||||||
|
|
||||||
```sh
|
|
||||||
docker build -t 'chromedriver-http-proxy' .
|
|
||||||
docker run --rm -p "32323:32323" chromedriver-http-proxy
|
|
||||||
```
|
|
||||||
|
|
30
Dockerfile
30
Dockerfile
|
@ -1,30 +0,0 @@
|
||||||
FROM ghcr.io/astral-sh/uv:debian-slim
|
|
||||||
|
|
||||||
ENV PROXY_PORT=32323
|
|
||||||
ENV PROXY_HOST=0.0.0.0
|
|
||||||
ENV SCRAPER_WAIT_TIME=10
|
|
||||||
ENV SCRAPER_HEADLESS=True
|
|
||||||
ENV SCRAPER_USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
COPY uv.lock pyproject.toml /app
|
|
||||||
RUN uv sync --locked
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
wget \
|
|
||||||
gnupg \
|
|
||||||
ca-certificates \
|
|
||||||
&& wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
|
||||||
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
|
||||||
&& apt-get update \
|
|
||||||
&& apt-get install -y google-chrome-stable \
|
|
||||||
&& rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
COPY . /app
|
|
||||||
|
|
||||||
CMD uv run proxy.py \
|
|
||||||
--port="$PROXY_PORT"\
|
|
||||||
--host="$PROXY_HOST"\
|
|
||||||
--wait="$SCRAPER_WAIT_TIME" \
|
|
||||||
--headless="$SCRAPER_HEADLESS" \
|
|
||||||
--user-agent="$SCRAPER_USER_AGENT"
|
|
15
README.md
15
README.md
|
@ -6,21 +6,11 @@ Simple HTTP proxy that renders pages with undetected-chromedriver and returns th
|
||||||
|
|
||||||
- Solves [Anubis](https://anubis.techaro.lol/)
|
- Solves [Anubis](https://anubis.techaro.lol/)
|
||||||
- Solves [go-away](https://git.gammaspectra.live/git/go-away)
|
- Solves [go-away](https://git.gammaspectra.live/git/go-away)
|
||||||
- Solves similiar [POW challenges](https://git.gammaspectra.live/git/go-away#other-similar-projects)
|
- Solves similiar POW challenges
|
||||||
- Sometimes bypasses Cloudflare Turnstile
|
- Sometimes bypasses Cloudflare Turnstile
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### Container
|
|
||||||
|
|
||||||
```sh
|
|
||||||
docker run --rm -p "32323:32323" ghcr.io/s4wyer/chromedriver-http-proxy # or s44wyer/chromedriver-http-proxy
|
|
||||||
```
|
|
||||||
|
|
||||||
There's also a [Docker compose example](/docker-compose.yml) with better config.
|
|
||||||
|
|
||||||
### System
|
|
||||||
|
|
||||||
uv:
|
uv:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
@ -62,7 +52,7 @@ This proxy has no authentication, and I don't plan to add any (PRs welcome thoug
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
- [ ] ARM Docker images
|
- [ ] Docker image
|
||||||
- [ ] Send JS/CSS to the client
|
- [ ] Send JS/CSS to the client
|
||||||
- [ ] Custom Chromium binary locations
|
- [ ] Custom Chromium binary locations
|
||||||
- [ ] More CLI arguments to control ChromeDriver behavior
|
- [ ] More CLI arguments to control ChromeDriver behavior
|
||||||
|
@ -70,7 +60,6 @@ This proxy has no authentication, and I don't plan to add any (PRs welcome thoug
|
||||||
- [ ] Screenshot endpoint
|
- [ ] Screenshot endpoint
|
||||||
- [ ] Allow custom headers
|
- [ ] Allow custom headers
|
||||||
- [ ] POST requests
|
- [ ] POST requests
|
||||||
- [x] Docker image
|
|
||||||
|
|
||||||
## Similiar Projects
|
## Similiar Projects
|
||||||
|
|
||||||
|
|
72
config.py
72
config.py
|
@ -1,72 +0,0 @@
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ScraperConfig:
|
|
||||||
wait_time: float
|
|
||||||
headless: bool
|
|
||||||
user_agent: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ServerConfig:
|
|
||||||
host: str
|
|
||||||
port: int
|
|
||||||
|
|
||||||
|
|
||||||
def get_configs():
|
|
||||||
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
|
|
||||||
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
|
|
||||||
usage="")
|
|
||||||
parser.add_argument(
|
|
||||||
"--port",
|
|
||||||
help="Port the proxy runs on.",
|
|
||||||
required=False,
|
|
||||||
type=int,
|
|
||||||
default=os.getenv("PROXY_PORT", 32323)
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--host",
|
|
||||||
help="Host the proxy to runs on.",
|
|
||||||
required=False,
|
|
||||||
type=str,
|
|
||||||
default=os.getenv("PROXY_HOST", "0.0.0.0")
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--wait",
|
|
||||||
help="Seconds to wait before returning content.",
|
|
||||||
required=False,
|
|
||||||
type=float,
|
|
||||||
default=os.getenv("SCRAPER_WAIT_TIME", 10)
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--headless",
|
|
||||||
help="Whether or not to run Chrome headless.",
|
|
||||||
required=False,
|
|
||||||
type=bool,
|
|
||||||
default=os.getenv("SCRAPER_HEADLESS", True)
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--user-agent",
|
|
||||||
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
|
|
||||||
required=False,
|
|
||||||
type=str,
|
|
||||||
default=os.getenv("SCRAPER_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36")
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
server_config = ServerConfig(host=args.host,
|
|
||||||
port=args.port)
|
|
||||||
|
|
||||||
scraper_config = ScraperConfig(wait_time=args.wait,
|
|
||||||
headless=args.headless,
|
|
||||||
user_agent=args.user_agent)
|
|
||||||
|
|
||||||
return server_config, scraper_config
|
|
|
@ -1,20 +0,0 @@
|
||||||
services:
|
|
||||||
chromedriver-http-proxy:
|
|
||||||
image: ghcr.io/s4wyer/chromedriver-http-proxy:latest
|
|
||||||
container_name: chromedriver-http-proxy
|
|
||||||
|
|
||||||
ports:
|
|
||||||
- "32323:32323"
|
|
||||||
|
|
||||||
environment:
|
|
||||||
- PROXY_PORT=32323
|
|
||||||
- PROXY_HOST=0.0.0.0
|
|
||||||
- SCRAPER_WAIT_TIME=10
|
|
||||||
- SCRAPER_HEADLESS=True
|
|
||||||
- SCRAPER_USER_AGENT="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
|
||||||
|
|
||||||
# Increase shared memory size to prevent Chromium from crashing.
|
|
||||||
# Recommended by Selenium https://hub.docker.com/r/selenium/standalone-chrome
|
|
||||||
shm_size: '2gb'
|
|
||||||
|
|
||||||
restart: unless-stopped
|
|
132
proxy.py
132
proxy.py
|
@ -1,8 +1,6 @@
|
||||||
import time
|
import time
|
||||||
import atexit
|
from dataclasses import dataclass
|
||||||
import logging
|
import argparse
|
||||||
|
|
||||||
from config import get_configs, ScraperConfig
|
|
||||||
|
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
@ -11,57 +9,120 @@ from selenium.webdriver import ChromeOptions
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScraperConfig:
|
||||||
|
wait_time: float
|
||||||
|
headless: bool
|
||||||
|
user_agent: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ServerConfig:
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
def __init__(self, config: ScraperConfig):
|
def __init__(self, config: ScraperConfig):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.driver = None
|
self.driver = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
self._setup_driver()
|
self._setup_driver()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self._cleanup()
|
||||||
|
|
||||||
|
def _cleanup(self):
|
||||||
|
driver = self.driver
|
||||||
|
driver.close()
|
||||||
|
driver.quit()
|
||||||
|
|
||||||
def _setup_driver(self):
|
def _setup_driver(self):
|
||||||
|
headless = self.config.headless
|
||||||
|
user_agent = self.config.user_agent
|
||||||
|
|
||||||
chrome_options = ChromeOptions()
|
chrome_options = ChromeOptions()
|
||||||
chrome_options.add_argument(f"--user-agent={self.config.user_agent}")
|
chrome_options.add_argument(f"--user-agent={user_agent}")
|
||||||
|
|
||||||
self.driver = uc.Chrome(
|
self.driver = uc.Chrome(
|
||||||
headless=self.config.headless,
|
headless=headless,
|
||||||
options=chrome_options,
|
options=chrome_options,
|
||||||
use_subprocess=False
|
use_subprocess=False
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Driver started.")
|
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
if self.driver:
|
|
||||||
try:
|
|
||||||
self.driver.quit()
|
|
||||||
logger.info("Driver closed.")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Exception during cleanup: {e}")
|
|
||||||
finally:
|
|
||||||
self.driver = None
|
|
||||||
|
|
||||||
def render_page(self, url):
|
def render_page(self, url):
|
||||||
logger.info(f"Fetching {url}...")
|
wait_time = self.config.wait_time
|
||||||
self.driver.get(url)
|
driver = self.driver
|
||||||
|
|
||||||
WebDriverWait(self.driver, timeout=self.config.wait_time).until(
|
driver.get(url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, wait_time).until(
|
||||||
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
||||||
)
|
)
|
||||||
|
|
||||||
time.sleep(self.config.wait_time)
|
time.sleep(wait_time)
|
||||||
|
|
||||||
logger.info(f"Fetched {url}.")
|
|
||||||
|
|
||||||
return self.driver.page_source
|
return self.driver.page_source
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# logging.basicConfig(level=logging.INFO)
|
parser = argparse.ArgumentParser(prog="ChromeDriver HTTP Proxy",
|
||||||
logger = logging.getLogger(__name__)
|
description="Simple HTTP proxy that renders pages with undetected-chromedriver and returns the HTML",
|
||||||
server_config, scraper_config = get_configs()
|
usage="")
|
||||||
|
parser.add_argument(
|
||||||
|
"--port",
|
||||||
|
help="Port the proxy runs on.",
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=32323
|
||||||
|
)
|
||||||
|
|
||||||
scraper = Scraper(scraper_config)
|
parser.add_argument(
|
||||||
|
"--host",
|
||||||
|
help="Host the proxy to runs on.",
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default="0.0.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
atexit.register(scraper.cleanup)
|
parser.add_argument(
|
||||||
|
"--wait",
|
||||||
|
help="Seconds to wait before returning content.",
|
||||||
|
required=False,
|
||||||
|
type=float,
|
||||||
|
default=10
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--headless",
|
||||||
|
help="Whether or not to run Chrome headless.",
|
||||||
|
required=False,
|
||||||
|
type=bool,
|
||||||
|
default=True
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--user-agent",
|
||||||
|
help="Chrome user agent. Changing with the current ChromeDriver version recommended.",
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
port = args.port
|
||||||
|
host = args.host
|
||||||
|
|
||||||
|
wait = args.wait
|
||||||
|
headless = args.headless
|
||||||
|
user_agent = args.user_agent
|
||||||
|
|
||||||
|
server_config = ServerConfig(host=host, port=port)
|
||||||
|
scraper_config = ScraperConfig(wait_time=wait, headless=headless, user_agent=user_agent)
|
||||||
|
|
||||||
# run the server
|
# run the server
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
@ -69,12 +130,11 @@ if __name__ == "__main__":
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def proxy_route():
|
def proxy_route():
|
||||||
url = request.args.get("url")
|
url = request.args.get("url")
|
||||||
|
with Scraper(scraper_config) as scraper:
|
||||||
try:
|
try:
|
||||||
html = scraper.render_page(url)
|
html = scraper.render_page(url)
|
||||||
return html
|
return html
|
||||||
logger.info(f"Successfully sent {url} to client.")
|
except Exception as e:
|
||||||
except Exception as e:
|
print(f"Error: {e}")
|
||||||
logger.error(f"Error sending {url} to client: {e}", 500)
|
|
||||||
|
|
||||||
app.run(host=server_config.host, port=server_config.port)
|
app.run(host=server_config.host, port=server_config.port)
|
||||||
|
|
|
@ -3,8 +3,6 @@ name = "playwright-http-proxy"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "AGPL-3.0-only"
|
|
||||||
license-files = ["LICEN[CS]E*"]
|
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"argparse>=1.4.0",
|
"argparse>=1.4.0",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue