perfect-postcode/finder/metrics.py

167 lines
4.3 KiB
Python

from prometheus_client import Counter, Gauge
# ---------------------------------------------------------------------------
# Gauges — current scrape state, updated after each outcode
# ---------------------------------------------------------------------------
scrape_state = Gauge(
"scrape_state",
"Current scrape state as a labeled gauge (1 = active)",
["state"],
)
scrape_outcodes_done = Gauge(
"scrape_outcodes_done",
"Outcodes processed in current channel",
)
scrape_outcodes_total = Gauge(
"scrape_outcodes_total",
"Total outcodes in current channel",
)
scrape_properties_total = Gauge(
"scrape_properties_total",
"Properties found so far",
["channel", "source"],
)
scrape_elapsed_seconds = Gauge(
"scrape_elapsed_seconds",
"Seconds since scrape started",
)
# ---------------------------------------------------------------------------
# Counters — Rightmove (monotonically increasing)
# ---------------------------------------------------------------------------
http_requests_total = Counter(
"http_requests_total",
"HTTP requests made to Rightmove",
["status", "endpoint"],
)
http_errors_total = Counter(
"http_errors_total",
"Rightmove HTTP connection/timeout errors",
["type"],
)
ip_rotations_total = Counter(
"ip_rotations_total",
"VPN IP rotation attempts",
["result"],
)
scrape_errors_total = Counter(
"scrape_errors_total",
"Per-outcode scrape errors",
["source"],
)
# ---------------------------------------------------------------------------
# Counters — home.co.uk
# ---------------------------------------------------------------------------
homecouk_requests_total = Counter(
"homecouk_requests_total",
"HTTP requests made to home.co.uk API",
["status"],
)
homecouk_errors_total = Counter(
"homecouk_errors_total",
"home.co.uk HTTP connection/timeout errors",
["type"],
)
homecouk_properties_scraped = Counter(
"homecouk_properties_scraped",
"Properties scraped from home.co.uk (before dedup)",
["channel"],
)
cross_source_dedup_total = Counter(
"cross_source_dedup_total",
"Properties skipped because same property already found on another source",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — OpenRent
# ---------------------------------------------------------------------------
openrent_requests_total = Counter(
"openrent_requests_total",
"HTTP requests made to OpenRent",
["status"],
)
openrent_errors_total = Counter(
"openrent_errors_total",
"OpenRent HTTP connection/timeout errors",
["type"],
)
openrent_properties_scraped = Counter(
"openrent_properties_scraped",
"Properties scraped from OpenRent (before dedup)",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — Zoopla
# ---------------------------------------------------------------------------
zoopla_pages_scraped = Counter(
"zoopla_pages_scraped",
"Search result pages scraped from Zoopla",
["channel"],
)
zoopla_errors_total = Counter(
"zoopla_errors_total",
"Zoopla scraping errors",
["type"],
)
zoopla_properties_scraped = Counter(
"zoopla_properties_scraped",
"Properties scraped from Zoopla (before dedup)",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — FlareSolverr / cookie management
# ---------------------------------------------------------------------------
flaresolverr_attempts_total = Counter(
"flaresolverr_attempts_total",
"FlareSolverr Cloudflare challenge-solving attempts",
["result"],
)
cookie_refreshes_total = Counter(
"cookie_refreshes_total",
"home.co.uk cookie refresh attempts (triggered by 403)",
["result"],
)
# ---------------------------------------------------------------------------
# Gauges — home.co.uk state
# ---------------------------------------------------------------------------
homecouk_enabled = Gauge(
"homecouk_enabled",
"Whether home.co.uk scraping is currently active (1=yes, 0=no)",
)
openrent_enabled = Gauge(
"openrent_enabled",
"Whether OpenRent scraping is currently active (1=yes, 0=no)",
)
zoopla_enabled = Gauge(
"zoopla_enabled",
"Whether Zoopla scraping is currently active (1=yes, 0=no)",
)