This commit is contained in:
Andras Schmelczer 2026-03-15 21:22:28 +00:00
parent 479ef92236
commit c38d654ac7
44 changed files with 2526 additions and 701 deletions

View file

@ -11,7 +11,9 @@ from metrics import http_errors_total, http_requests_total, ip_rotations_total
log = logging.getLogger("rightmove")
_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0)
_ua = UserAgent(
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
)
def _endpoint_label(url: str) -> str:
@ -27,6 +29,7 @@ def _status_label(code: int) -> str:
return "5xx"
return str(code)
# Gluetun control API — runs on port 8000 inside the gluetun container.
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
GLUETUN_API = "http://127.0.0.1:8000"
@ -42,17 +45,25 @@ def rotate_ip() -> bool:
# Get current IP
with httpx.Client(timeout=10) as ctl:
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
old_ip = (
old_ip_resp.json().get("public_ip", "unknown")
if old_ip_resp.status_code == 200
else "unknown"
)
log.info("Current IP: %s", old_ip)
# Trigger server change — PUT with empty JSON body picks a random server
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}
)
if resp.status_code != 200:
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
return False
time.sleep(2)
resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
resp = ctl.put(
f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}
)
if resp.status_code != 200:
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
return False
@ -99,7 +110,9 @@ def fetch_with_retry(
for attempt in range(MAX_RETRIES):
try:
resp = client.get(url, params=params)
http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc()
http_requests_total.labels(
status=_status_label(resp.status_code), endpoint=endpoint
).inc()
if resp.status_code == 200:
return resp.json()
if resp.status_code == 403 and on_403:
@ -111,15 +124,34 @@ def fetch_with_retry(
return None
if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
continue
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
return None
except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
except (
httpx.ConnectError,
httpx.ReadTimeout,
httpx.WriteTimeout,
httpx.PoolTimeout,
) as e:
http_errors_total.labels(type=type(e).__name__).inc()
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
http_errors_total.labels(type="retry_exhausted").inc()
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)