72 lines
2.2 KiB
Python
72 lines
2.2 KiB
Python
import logging
|
|
import random
|
|
import time
|
|
|
|
import httpx
|
|
from fake_useragent import UserAgent
|
|
|
|
from constants import MAX_RETRIES, RETRY_BASE_DELAY
|
|
|
|
log = logging.getLogger("rightmove")
|
|
|
|
_ua = UserAgent(
|
|
browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0
|
|
)
|
|
|
|
|
|
def make_client() -> httpx.Client:
|
|
return httpx.Client(
|
|
timeout=30,
|
|
headers={"User-Agent": _ua.random, "Accept": "application/json"},
|
|
follow_redirects=True,
|
|
)
|
|
|
|
|
|
def fetch_with_retry(
|
|
client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
|
|
) -> dict | None:
|
|
"""GET JSON with retries on 429/5xx/connection errors.
|
|
|
|
Returns None on permanent failure. The on_403 argument is kept for
|
|
compatibility with older callers; 403 is now treated as non-retryable.
|
|
"""
|
|
for attempt in range(MAX_RETRIES):
|
|
try:
|
|
resp = client.get(url, params=params)
|
|
if resp.status_code == 200:
|
|
return resp.json()
|
|
if resp.status_code == 403 and on_403:
|
|
log.error("HTTP 403 from %s (forbidden)", url)
|
|
return None
|
|
if resp.status_code in (429, 500, 502, 503, 504):
|
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
|
log.warning(
|
|
"HTTP %d from %s, retry %d/%d in %.1fs",
|
|
resp.status_code,
|
|
url,
|
|
attempt + 1,
|
|
MAX_RETRIES,
|
|
delay,
|
|
)
|
|
time.sleep(delay)
|
|
continue
|
|
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
|
return None
|
|
except (
|
|
httpx.ConnectError,
|
|
httpx.ReadTimeout,
|
|
httpx.WriteTimeout,
|
|
httpx.PoolTimeout,
|
|
) as e:
|
|
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
|
log.warning(
|
|
"%s from %s, retry %d/%d in %.1fs",
|
|
type(e).__name__,
|
|
url,
|
|
attempt + 1,
|
|
MAX_RETRIES,
|
|
delay,
|
|
)
|
|
time.sleep(delay)
|
|
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
|
return None
|