home.co.uk scraping

This commit is contained in:
Andras Schmelczer 2026-03-08 21:29:27 +00:00
parent 74d6dd7bf8
commit f3e3c1ee49
6 changed files with 538 additions and 28 deletions

View file

@ -23,7 +23,7 @@ scrape_outcodes_total = Gauge(
scrape_properties_total = Gauge(
"scrape_properties_total",
"Properties found so far",
["channel"],
["channel", "source"],
)
scrape_elapsed_seconds = Gauge(
@ -32,18 +32,18 @@ scrape_elapsed_seconds = Gauge(
)
# ---------------------------------------------------------------------------
# Counters — monotonically increasing
# Counters — Rightmove (monotonically increasing)
# ---------------------------------------------------------------------------
http_requests_total = Counter(
"http_requests_total",
"HTTP requests made by the scraper",
"HTTP requests made to Rightmove",
["status", "endpoint"],
)
http_errors_total = Counter(
"http_errors_total",
"HTTP connection/timeout errors",
"Rightmove HTTP connection/timeout errors",
["type"],
)
@ -56,4 +56,58 @@ ip_rotations_total = Counter(
scrape_errors_total = Counter(
"scrape_errors_total",
"Per-outcode scrape errors",
["source"],
)
# ---------------------------------------------------------------------------
# Counters — home.co.uk
# ---------------------------------------------------------------------------
homecouk_requests_total = Counter(
"homecouk_requests_total",
"HTTP requests made to home.co.uk API",
["status"],
)
homecouk_errors_total = Counter(
"homecouk_errors_total",
"home.co.uk HTTP connection/timeout errors",
["type"],
)
homecouk_properties_scraped = Counter(
"homecouk_properties_scraped",
"Properties scraped from home.co.uk (before dedup)",
["channel"],
)
cross_source_dedup_total = Counter(
"cross_source_dedup_total",
"home.co.uk properties skipped because same property already found on Rightmove",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — FlareSolverr / cookie management
# ---------------------------------------------------------------------------
flaresolverr_attempts_total = Counter(
"flaresolverr_attempts_total",
"FlareSolverr Cloudflare challenge-solving attempts",
["result"],
)
cookie_refreshes_total = Counter(
"cookie_refreshes_total",
"home.co.uk cookie refresh attempts (triggered by 403)",
["result"],
)
# ---------------------------------------------------------------------------
# Gauges — home.co.uk state
# ---------------------------------------------------------------------------
homecouk_enabled = Gauge(
"homecouk_enabled",
"Whether home.co.uk scraping is currently active (1=yes, 0=no)",
)