perfect-postcode/finder/http_client.py
2026-05-17 10:16:30 +01:00

72 lines
2.2 KiB
Python

import logging
import random
import time
import httpx
from fake_useragent import UserAgent
from constants import MAX_RETRIES, RETRY_BASE_DELAY
log = logging.getLogger("rightmove")
_ua = UserAgent(
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
)
def make_client() -> httpx.Client:
return httpx.Client(
timeout=30,
headers={"User-Agent": _ua.random, "Accept": "application/json"},
follow_redirects=True,
)
def fetch_with_retry(
client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
) -> dict | None:
"""GET JSON with retries on 429/5xx/connection errors.
Returns None on permanent failure. The on_403 argument is kept for
compatibility with older callers; 403 is now treated as non-retryable.
"""
for attempt in range(MAX_RETRIES):
try:
resp = client.get(url, params=params)
if resp.status_code == 200:
return resp.json()
if resp.status_code == 403 and on_403:
log.error("HTTP 403 from %s (forbidden)", url)
return None
if resp.status_code in (429, 500, 502, 503, 504):
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning(
"HTTP %d from %s, retry %d/%d in %.1fs",
resp.status_code,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
continue
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
return None
except (
httpx.ConnectError,
httpx.ReadTimeout,
httpx.WriteTimeout,
httpx.PoolTimeout,
) as e:
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning(
"%s from %s, retry %d/%d in %.1fs",
type(e).__name__,
url,
attempt + 1,
MAX_RETRIES,
delay,
)
time.sleep(delay)
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
return None