import logging import random import threading import time import httpx from fake_useragent import UserAgent from constants import MAX_RETRIES, RETRY_BASE_DELAY from metrics import http_errors_total, http_requests_total, ip_rotations_total log = logging.getLogger("rightmove") _ua = UserAgent( browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0 ) def _endpoint_label(url: str) -> str: if "typeahead" in url: return "typeahead" if "search" in url: return "search" return "other" def _status_label(code: int) -> str: if code >= 500: return "5xx" return str(code) # Gluetun control API — runs on port 8000 inside the gluetun container. # Since finder uses network_mode: service:gluetun, localhost IS gluetun. GLUETUN_API = "http://127.0.0.1:8000" _ip_rotate_lock = threading.Lock() def rotate_ip() -> bool: """Ask gluetun to reconnect to a different VPN server, getting a new IP. Returns True if the IP changed successfully.""" with _ip_rotate_lock: log.info("Rotating VPN IP via gluetun...") try: # Get current IP with httpx.Client(timeout=10) as ctl: old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip") old_ip = ( old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown" ) log.info("Current IP: %s", old_ip) # Trigger server change — PUT with empty JSON body picks a random server resp = ctl.put( f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"} ) if resp.status_code != 200: log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text) return False time.sleep(2) resp = ctl.put( f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"} ) if resp.status_code != 200: log.error("Failed to start VPN: %d %s", resp.status_code, resp.text) return False # Wait for reconnection for _ in range(30): time.sleep(2) try: with httpx.Client(timeout=10) as ctl: new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip") if new_ip_resp.status_code == 200: new_ip = new_ip_resp.json().get("public_ip", "") if new_ip and new_ip != old_ip: log.info("IP rotated: %s → %s", old_ip, new_ip) ip_rotations_total.labels(result="success").inc() return True except Exception: pass # VPN still reconnecting log.warning("IP rotation timed out (may still be same IP)") ip_rotations_total.labels(result="failure").inc() return False except Exception as e: log.error("IP rotation failed: %s", e) ip_rotations_total.labels(result="failure").inc() return False def make_client() -> httpx.Client: return httpx.Client( timeout=30, headers={"User-Agent": _ua.random, "Accept": "application/json"}, follow_redirects=True, ) def fetch_with_retry( client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True ) -> dict | None: """GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure. On 403, triggers IP rotation and retries once.""" endpoint = _endpoint_label(url) for attempt in range(MAX_RETRIES): try: resp = client.get(url, params=params) http_requests_total.labels( status=_status_label(resp.status_code), endpoint=endpoint ).inc() if resp.status_code == 200: return resp.json() if resp.status_code == 403 and on_403: log.warning("HTTP 403 — IP likely blocked, rotating...") if rotate_ip(): # Retry once with new IP (but don't recurse on 403 again) return fetch_with_retry(client, url, params, on_403=False) log.error("IP rotation failed, giving up on %s", url) return None if resp.status_code in (429, 500, 502, 503, 504): delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) log.warning( "HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay, ) time.sleep(delay) continue log.error("HTTP %d from %s (non-retryable)", resp.status_code, url) return None except ( httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout, ) as e: http_errors_total.labels(type=type(e).__name__).inc() delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1) log.warning( "%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay, ) time.sleep(delay) http_errors_total.labels(type="retry_exhausted").inc() log.error("All %d retries exhausted for %s", MAX_RETRIES, url) return None