"""FlareSolverr client — fetch Cloudflare-protected pages as rendered HTML. FlareSolverr (https://github.com/FlareSolverr/FlareSolverr) drives an undetected browser to pass Cloudflare's challenge and returns the fully rendered HTML. It runs as a sidecar service (see docker-compose.yml) sharing the Gluetun VPN network namespace, so its browser egresses through the VPN. Verified working against Zoopla's managed Turnstile on a datacenter VPN IP, provided a reused session and a generous maxTimeout (~120s) — the first challenge solve is slow, subsequent requests on the warm session are fast. """ import logging import httpx from constants import FLARESOLVERR_MAX_TIMEOUT_MS, FLARESOLVERR_URL log = logging.getLogger("flaresolverr") class FlareSolverrError(Exception): """Raised when FlareSolverr cannot fetch/solve a URL.""" class FlareSolverrSession: """A reusable FlareSolverr browser session (context manager). Reusing one session keeps the cleared Cloudflare cookies warm across requests, so only the first fetch pays the full challenge-solve cost.""" def __init__( self, url: str = FLARESOLVERR_URL, session: str = "finder", max_timeout_ms: int = FLARESOLVERR_MAX_TIMEOUT_MS, ) -> None: self._url = url self._session = session self._max_timeout = max_timeout_ms # Read timeout must comfortably exceed maxTimeout (FlareSolverr blocks # for up to maxTimeout while solving before responding). self._client = httpx.Client(timeout=httpx.Timeout(self._max_timeout / 1000 + 30)) self._active = False def _post(self, payload: dict) -> dict: try: resp = self._client.post(self._url, json=payload) resp.raise_for_status() data = resp.json() except (httpx.HTTPError, ValueError) as exc: raise FlareSolverrError( f"FlareSolverr request to {self._url} failed: {exc}" ) from exc if data.get("status") != "ok": raise FlareSolverrError( f"FlareSolverr {payload.get('cmd')} failed: {data.get('message')}" ) return data def __enter__(self) -> "FlareSolverrSession": # Start from a clean session (ignore destroy errors for a fresh name). try: self._post({"cmd": "sessions.destroy", "session": self._session}) except FlareSolverrError: pass self._post({"cmd": "sessions.create", "session": self._session}) self._active = True log.info("FlareSolverr session %r ready at %s", self._session, self._url) return self def get(self, url: str) -> str: """Fetch a URL through FlareSolverr; return the solved HTML.""" data = self._post( { "cmd": "request.get", "session": self._session, "url": url, "maxTimeout": self._max_timeout, } ) solution = data.get("solution") or {} return solution.get("response", "") or "" def __exit__(self, *exc_info) -> None: if self._active: try: self._post({"cmd": "sessions.destroy", "session": self._session}) except FlareSolverrError as exc: log.debug("FlareSolverr session destroy failed: %s", exc) self._client.close()