all good
This commit is contained in:
parent
47d89f6fad
commit
017902b8e6
82 changed files with 331466 additions and 54841 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -21,3 +21,4 @@ video/auth.*
|
|||
*.jpeg
|
||||
*.mp4
|
||||
|
||||
r5-java/tmp
|
||||
|
|
|
|||
11
Dockerfile
11
Dockerfile
|
|
@ -1,6 +1,17 @@
|
|||
# Stage 1: Build frontend
|
||||
FROM node:22-bookworm-slim AS frontend
|
||||
WORKDIR /app/frontend
|
||||
|
||||
ARG FRONTEND_BUGSINK_DSN=
|
||||
ARG BUGSINK_ENVIRONMENT=production
|
||||
ARG BUGSINK_RELEASE=
|
||||
ARG BUGSINK_SEND_DEFAULT_PII=true
|
||||
|
||||
ENV FRONTEND_BUGSINK_DSN=$FRONTEND_BUGSINK_DSN
|
||||
ENV BUGSINK_ENVIRONMENT=$BUGSINK_ENVIRONMENT
|
||||
ENV BUGSINK_RELEASE=$BUGSINK_RELEASE
|
||||
ENV BUGSINK_SEND_DEFAULT_PII=$BUGSINK_SEND_DEFAULT_PII
|
||||
|
||||
COPY frontend/package.json frontend/package-lock.json ./
|
||||
RUN npm ci
|
||||
RUN apt-get update \
|
||||
|
|
|
|||
29
README.md
29
README.md
|
|
@ -145,6 +145,13 @@ export STRIPE_WEBHOOK_SECRET=...
|
|||
export STRIPE_REFERRAL_COUPON_ID=...
|
||||
export GOOGLE_OAUTH_CLIENT_ID=...
|
||||
export GOOGLE_OAUTH_CLIENT_SECRET=...
|
||||
|
||||
# Optional Bugsink/Sentry-compatible error reporting
|
||||
export BUGSINK_DSN=...
|
||||
export FRONTEND_BUGSINK_DSN=...
|
||||
export BUGSINK_ENVIRONMENT=development
|
||||
export BUGSINK_RELEASE=...
|
||||
export BUGSINK_SEND_DEFAULT_PII=false
|
||||
```
|
||||
|
||||
```bash
|
||||
|
|
@ -199,3 +206,25 @@ docker build -t property-map .
|
|||
|
||||
The container entrypoint runs `property-map-server` with the expected data paths
|
||||
under `/app/data` and serves `frontend/dist` when `--dist` is present.
|
||||
|
||||
## Bugsink
|
||||
|
||||
Bugsink is wired through the Sentry-compatible SDKs. Set `BUGSINK_DSN` for the
|
||||
Rust API and `FRONTEND_BUGSINK_DSN` for the browser app. If the frontend DSN is
|
||||
omitted, the server falls back to `BUGSINK_DSN` when injecting runtime config
|
||||
into served HTML.
|
||||
|
||||
The frontend build also accepts `FRONTEND_BUGSINK_DSN`, `BUGSINK_ENVIRONMENT`,
|
||||
`BUGSINK_RELEASE`, and `BUGSINK_SEND_DEFAULT_PII` as build-time values. Runtime
|
||||
HTML injection is preferred for Docker deployments because the DSN can be set
|
||||
with environment variables when the container starts.
|
||||
|
||||
Production Webpack builds emit hidden source maps. Upload them to Bugsink after
|
||||
building if you want browser stack traces to resolve to source:
|
||||
|
||||
```bash
|
||||
cd frontend
|
||||
npx sentry-cli sourcemaps inject dist
|
||||
SENTRY_AUTH_TOKEN=... npx sentry-cli --url https://your-bugsink-instance \
|
||||
sourcemaps --org bugsinkhasnoorgs --project ignoredfornow upload dist
|
||||
```
|
||||
|
|
|
|||
328657
analyses/online_listings_buy.ipynb
Normal file
328657
analyses/online_listings_buy.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -45,6 +45,11 @@ services:
|
|||
STRIPE_REFERRAL_COUPON_ID: L5uQqagl
|
||||
GOOGLE_OAUTH_CLIENT_ID: 536485512604-740bbn3tf027ogrdcr5sqor4ntorkaqv.apps.googleusercontent.com
|
||||
GOOGLE_OAUTH_CLIENT_SECRET: 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
|
||||
BUGSINK_DSN: ${BUGSINK_DSN:-}
|
||||
FRONTEND_BUGSINK_DSN: ${FRONTEND_BUGSINK_DSN:-}
|
||||
BUGSINK_ENVIRONMENT: ${BUGSINK_ENVIRONMENT:-development}
|
||||
BUGSINK_RELEASE: ${BUGSINK_RELEASE:-}
|
||||
BUGSINK_SEND_DEFAULT_PII: ${BUGSINK_SEND_DEFAULT_PII:-false}
|
||||
depends_on:
|
||||
screenshot:
|
||||
condition: service_healthy
|
||||
|
|
@ -92,6 +97,10 @@ services:
|
|||
environment:
|
||||
API_PROXY_TARGET: http://server:8001
|
||||
PB_PROXY_TARGET: http://pocketbase:8090
|
||||
FRONTEND_BUGSINK_DSN: ${FRONTEND_BUGSINK_DSN:-}
|
||||
BUGSINK_ENVIRONMENT: ${BUGSINK_ENVIRONMENT:-development}
|
||||
BUGSINK_RELEASE: ${BUGSINK_RELEASE:-}
|
||||
BUGSINK_SEND_DEFAULT_PII: ${BUGSINK_SEND_DEFAULT_PII:-false}
|
||||
|
||||
pocketbase:
|
||||
init: true
|
||||
|
|
|
|||
1
finder/.gitignore
vendored
Normal file
1
finder/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
data/
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml ./
|
||||
RUN uv pip install --system -r pyproject.toml
|
||||
RUN playwright install-deps firefox
|
||||
RUN camoufox fetch \
|
||||
&& python -c "from camoufox.pkgman import camoufox_path; p = camoufox_path(download_if_missing=False); print('Camoufox verified at', p)"
|
||||
|
||||
COPY *.py ./
|
||||
COPY property-data/arcgis_data.parquet /data/arcgis_data.parquet
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:1234/health')"
|
||||
|
||||
CMD ["python3", "main.py"]
|
||||
|
|
@ -1,8 +1,13 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
|
||||
DATA_DIR = Path("/app/data")
|
||||
FINDER_DIR = Path(__file__).resolve().parent
|
||||
REPO_DIR = FINDER_DIR.parent
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", str(FINDER_DIR / "data")))
|
||||
ARCGIS_PATH = Path(
|
||||
os.environ.get("ARCGIS_PATH", str(REPO_DIR / "property-data" / "arcgis_data.parquet"))
|
||||
)
|
||||
PAGE_SIZE = 24
|
||||
DELAY_BETWEEN_PAGES = 0.3
|
||||
DELAY_BETWEEN_OUTCODES = 0.5
|
||||
|
|
@ -10,42 +15,6 @@ MAX_RETRIES = 3
|
|||
RETRY_BASE_DELAY = 2.0
|
||||
GRID_CELL_SIZE = 0.01 # degrees for postcode spatial index
|
||||
MAX_BEDROOMS = 20 # sanity cap — values above this are almost certainly parsing errors
|
||||
# Rent sanity bounds (monthly). Rents outside this range are nulled out — they are
|
||||
# almost always total-stay pricing (e.g. "Golf Open 2026" short lets), annual rents
|
||||
# mislabelled as monthly, or data errors.
|
||||
MIN_RENT_MONTHLY = 50 # below £50/month is implausible for any UK property
|
||||
MAX_RENT_MONTHLY = 25_000 # above £25k/month covers ultra-prime London; higher is suspect
|
||||
SEED = 42
|
||||
CHECKPOINT_INTERVAL = int(os.environ.get("CHECKPOINT_INTERVAL", "900")) # seconds
|
||||
|
||||
# Schedule: hour of day (UTC) to auto-run scrape. Set to -1 to disable.
|
||||
SCHEDULE_HOUR = int(os.environ.get("SCHEDULE_HOUR", "3"))
|
||||
# Whether to run a scrape immediately on startup
|
||||
RUN_ON_STARTUP = os.environ.get("RUN_ON_STARTUP", "").lower() in ("1", "true", "yes")
|
||||
# Enable/disable individual sources
|
||||
SCRAPE_RIGHTMOVE = os.environ.get("SCRAPE_RIGHTMOVE", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_HOMECOUK = os.environ.get("SCRAPE_HOMECOUK", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
SCRAPE_ZOOPLA = os.environ.get("SCRAPE_ZOOPLA", "true").lower() in (
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
)
|
||||
|
||||
# URL to trigger server data reload after scrape (e.g. http://server:8001/api/reload)
|
||||
RELOAD_URL = os.environ.get("RELOAD_URL", "")
|
||||
|
||||
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
||||
SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
|
||||
|
|
@ -55,14 +24,36 @@ RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
|
|||
HOMECOUK_BASE = "https://home.co.uk"
|
||||
HOMECOUK_API_BASE = f"{HOMECOUK_BASE}/api"
|
||||
HOMECOUK_PER_PAGE = 30 # max supported by the API
|
||||
HOMECOUK_CONCURRENCY = int(os.environ.get("HOMECOUK_CONCURRENCY", "4"))
|
||||
|
||||
# OpenRent
|
||||
OPENRENT_BASE = "https://www.openrent.co.uk"
|
||||
|
||||
# Zoopla
|
||||
ZOOPLA_BASE = "https://www.zoopla.co.uk"
|
||||
|
||||
# Greater London-ish postcode areas. This intentionally uses broad area
|
||||
# prefixes so a manual scrape can include central/inner London plus common
|
||||
# outer-London and near-London outcodes without maintaining a long borough list.
|
||||
LONDON_OUTCODE_PREFIXES = {
|
||||
"E",
|
||||
"EC",
|
||||
"N",
|
||||
"NW",
|
||||
"SE",
|
||||
"SW",
|
||||
"W",
|
||||
"WC",
|
||||
"BR",
|
||||
"CR",
|
||||
"DA",
|
||||
"EN",
|
||||
"HA",
|
||||
"IG",
|
||||
"KT",
|
||||
"RM",
|
||||
"SM",
|
||||
"TW",
|
||||
"UB",
|
||||
"WD",
|
||||
}
|
||||
|
||||
PROPERTY_TYPE_MAP = {
|
||||
"Detached": "Detached",
|
||||
"Semi-Detached": "Semi-Detached",
|
||||
|
|
@ -150,5 +141,4 @@ PROPERTY_TYPE_MAP = {
|
|||
|
||||
CHANNELS = [
|
||||
{"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
|
||||
{"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
|
||||
]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
|
|
@ -6,7 +6,6 @@ import re
|
|||
import time
|
||||
from urllib.parse import unquote
|
||||
|
||||
import httpx
|
||||
from curl_cffi.requests import Session
|
||||
from curl_cffi.requests.errors import RequestsError
|
||||
|
||||
|
|
@ -19,12 +18,6 @@ from constants import (
|
|||
PROPERTY_TYPE_MAP,
|
||||
RETRY_BASE_DELAY,
|
||||
)
|
||||
from metrics import (
|
||||
flaresolverr_attempts_total,
|
||||
homecouk_errors_total,
|
||||
homecouk_properties_scraped,
|
||||
homecouk_requests_total,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import normalize_postcode, normalize_sub_type, validate_floor_area
|
||||
|
||||
|
|
@ -36,101 +29,73 @@ class CookiesExpiredError(Exception):
|
|||
|
||||
|
||||
# Channel mapping: internal name → URL path segment
|
||||
HOMECOUK_CHANNELS = {
|
||||
"BUY": "for-sale",
|
||||
"RENT": "to-rent",
|
||||
}
|
||||
|
||||
|
||||
FLARESOLVERR_URL = os.environ.get("FLARESOLVERR_URL", "http://flaresolverr:8191")
|
||||
|
||||
|
||||
def solve_cloudflare() -> tuple[dict[str, str], str] | None:
|
||||
"""Use FlareSolverr to solve the Cloudflare challenge.
|
||||
Returns (cookies_dict, user_agent) or None on failure."""
|
||||
log.info("Solving Cloudflare challenge via FlareSolverr at %s", FLARESOLVERR_URL)
|
||||
try:
|
||||
with httpx.Client(timeout=120) as client:
|
||||
resp = client.post(
|
||||
f"{FLARESOLVERR_URL}/v1",
|
||||
json={
|
||||
"cmd": "request.get",
|
||||
"url": f"{HOMECOUK_BASE}/for-sale/e1/",
|
||||
"maxTimeout": 60000,
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
log.error("FlareSolverr returned HTTP %d", resp.status_code)
|
||||
return None
|
||||
|
||||
data = resp.json()
|
||||
if data.get("status") != "ok":
|
||||
log.error("FlareSolverr error: %s", data.get("message", "unknown"))
|
||||
return None
|
||||
|
||||
solution = data["solution"]
|
||||
raw_cookies = solution.get("cookies", [])
|
||||
user_agent = solution.get("userAgent", "")
|
||||
|
||||
# Pass through ALL cookies from FlareSolverr — different Cloudflare
|
||||
# configurations set different cookies (cf_clearance only appears when
|
||||
# a challenge is triggered; it's not needed if no challenge was detected)
|
||||
cookies = {}
|
||||
for c in raw_cookies:
|
||||
name = c.get("name", "")
|
||||
if name:
|
||||
cookies[name] = c["value"]
|
||||
|
||||
if not cookies:
|
||||
log.error("FlareSolverr solved but returned no cookies at all")
|
||||
flaresolverr_attempts_total.labels(result="no_cookies").inc()
|
||||
return None
|
||||
|
||||
log.info(
|
||||
"Cloudflare solved — got %d cookies, UA: %s",
|
||||
len(cookies),
|
||||
user_agent[:60],
|
||||
)
|
||||
flaresolverr_attempts_total.labels(result="success").inc()
|
||||
return cookies, user_agent
|
||||
|
||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
||||
log.warning("FlareSolverr not available: %s", e)
|
||||
flaresolverr_attempts_total.labels(result="unavailable").inc()
|
||||
return None
|
||||
except Exception as e:
|
||||
log.error("FlareSolverr error: %s", e)
|
||||
flaresolverr_attempts_total.labels(result="error").inc()
|
||||
return None
|
||||
HOMECOUK_URL_SEGMENT = "for-sale"
|
||||
|
||||
|
||||
def load_cookies() -> tuple[dict[str, str], str] | None:
|
||||
"""Get home.co.uk cookies + user-agent.
|
||||
Tries FlareSolverr first, then falls back to environment variables.
|
||||
Returns (cookies_dict, user_agent) or None if not configured."""
|
||||
# Try FlareSolverr first
|
||||
result = solve_cloudflare()
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Fall back to env vars
|
||||
cf_clearance = os.environ.get("HOMECOUK_CF_CLEARANCE", "")
|
||||
session = os.environ.get("HOMECOUK_SESSION", "")
|
||||
if not cf_clearance or not session:
|
||||
return None
|
||||
Environment cookies are optional. When they are not present, bootstrap a
|
||||
regular local session by visiting home.co.uk with curl_cffi's Chrome
|
||||
impersonation and reusing the cookies set by the site.
|
||||
"""
|
||||
user_agent = os.environ.get(
|
||||
"HOMECOUK_USER_AGENT",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/145.0.0.0 Safari/537.36",
|
||||
)
|
||||
return {"cf_clearance": cf_clearance, "homecouk_session": session}, user_agent
|
||||
|
||||
env_cookies = {
|
||||
name: value
|
||||
for name, value in {
|
||||
"cf_clearance": os.environ.get("HOMECOUK_CF_CLEARANCE", ""),
|
||||
"homecouk_session": os.environ.get("HOMECOUK_SESSION", ""),
|
||||
"XSRF-TOKEN": os.environ.get("HOMECOUK_XSRF_TOKEN", ""),
|
||||
}.items()
|
||||
if value
|
||||
}
|
||||
if env_cookies.get("homecouk_session"):
|
||||
return env_cookies, user_agent
|
||||
|
||||
session = Session(impersonate="chrome")
|
||||
session.headers.update(
|
||||
{
|
||||
"User-Agent": user_agent,
|
||||
"Accept": (
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,"
|
||||
"*/*;q=0.8"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
for url in (HOMECOUK_BASE, f"{HOMECOUK_BASE}/for-sale/br1/"):
|
||||
try:
|
||||
response = session.get(url, timeout=30)
|
||||
except RequestsError as exc:
|
||||
log.warning("home.co.uk cookie bootstrap failed for %s: %s", url, exc)
|
||||
continue
|
||||
if response.status_code == 403:
|
||||
raise CookiesExpiredError("home.co.uk returned HTTP 403 during bootstrap")
|
||||
if response.status_code >= 400:
|
||||
log.warning(
|
||||
"home.co.uk cookie bootstrap got HTTP %d from %s",
|
||||
response.status_code,
|
||||
url,
|
||||
)
|
||||
|
||||
cookies = session.cookies.get_dict()
|
||||
if cookies.get("homecouk_session") and cookies.get("XSRF-TOKEN"):
|
||||
log.info("home.co.uk local session bootstrapped")
|
||||
return cookies, user_agent
|
||||
|
||||
log.warning("home.co.uk did not provide session cookies during bootstrap")
|
||||
return None
|
||||
|
||||
|
||||
def make_client(cookies: dict[str, str], user_agent: str) -> Session:
|
||||
"""Create a curl_cffi Session configured for home.co.uk API calls.
|
||||
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
|
||||
to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
|
||||
Uses Chrome TLS impersonation so browser-derived cookies remain valid."""
|
||||
session = Session(impersonate="chrome")
|
||||
session.headers.update(
|
||||
{
|
||||
|
|
@ -150,12 +115,6 @@ def make_client(cookies: dict[str, str], user_agent: str) -> Session:
|
|||
return session
|
||||
|
||||
|
||||
def _status_label(code: int) -> str:
|
||||
if code >= 500:
|
||||
return "5xx"
|
||||
return str(code)
|
||||
|
||||
|
||||
def fetch_page(
|
||||
client: Session, url: str, params: dict, max_retries: int = 3
|
||||
) -> dict | None:
|
||||
|
|
@ -164,12 +123,10 @@ def fetch_page(
|
|||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = client.get(url, params=params, timeout=30)
|
||||
homecouk_requests_total.labels(status=_status_label(resp.status_code)).inc()
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
return resp.json()
|
||||
except json.JSONDecodeError:
|
||||
homecouk_errors_total.labels(type="json_decode").inc()
|
||||
log.error(
|
||||
"Non-JSON response from %s (got %s)",
|
||||
url,
|
||||
|
|
@ -195,7 +152,6 @@ def fetch_page(
|
|||
except CookiesExpiredError:
|
||||
raise
|
||||
except RequestsError as e:
|
||||
homecouk_errors_total.labels(type=type(e).__name__).inc()
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning(
|
||||
"%s from %s, retry %d/%d in %.1fs",
|
||||
|
|
@ -206,7 +162,6 @@ def fetch_page(
|
|||
delay,
|
||||
)
|
||||
time.sleep(delay)
|
||||
homecouk_errors_total.labels(type="retry_exhausted").inc()
|
||||
log.error("All %d retries exhausted for %s", max_retries, url)
|
||||
return None
|
||||
|
||||
|
|
@ -301,7 +256,6 @@ def map_property_type(raw_type: str | None) -> str:
|
|||
|
||||
def transform_property(
|
||||
prop: dict,
|
||||
channel: str,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
) -> dict | None:
|
||||
"""Transform a raw home.co.uk property dict into our output schema."""
|
||||
|
|
@ -365,7 +319,7 @@ def transform_property(
|
|||
"Property type": map_property_type(listing_type),
|
||||
"Property sub-type": normalize_sub_type(listing_type),
|
||||
"price": int(price),
|
||||
"price_frequency": "" if channel == "BUY" else "monthly",
|
||||
"price_frequency": "",
|
||||
"Price qualifier": price_qualifier,
|
||||
"Total floor area (sqm)": parse_floor_area(prop.get("description")),
|
||||
"Listing URL": f"{HOMECOUK_BASE}/property/{listing_id}",
|
||||
|
|
@ -377,13 +331,11 @@ def transform_property(
|
|||
def search_outcode(
|
||||
client: Session,
|
||||
outcode: str,
|
||||
channel: str,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
max_properties: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Paginate through search results for one outcode+channel.
|
||||
channel: "BUY" or "RENT".
|
||||
Returns transformed properties."""
|
||||
url_segment = HOMECOUK_CHANNELS[channel]
|
||||
"""Paginate through sale search results for one outcode."""
|
||||
url_segment = HOMECOUK_URL_SEGMENT
|
||||
url = f"{HOMECOUK_API_BASE}/{url_segment}/{outcode.lower()}/"
|
||||
properties = []
|
||||
page = 1
|
||||
|
|
@ -410,12 +362,11 @@ def search_outcode(
|
|||
break
|
||||
|
||||
for prop in raw_props:
|
||||
transformed = transform_property(prop, channel, pc_index)
|
||||
transformed = transform_property(prop, pc_index)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
homecouk_properties_scraped.labels(
|
||||
channel="buy" if channel == "BUY" else "rent",
|
||||
).inc()
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties
|
||||
|
||||
# Check pagination
|
||||
pagination = data.get("pagination", {})
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
import httpx
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
from constants import MAX_RETRIES, RETRY_BASE_DELAY
|
||||
from metrics import http_errors_total, http_requests_total, ip_rotations_total
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
|
@ -16,83 +14,6 @@ _ua = UserAgent(
|
|||
)
|
||||
|
||||
|
||||
def _endpoint_label(url: str) -> str:
|
||||
if "typeahead" in url:
|
||||
return "typeahead"
|
||||
if "search" in url:
|
||||
return "search"
|
||||
return "other"
|
||||
|
||||
|
||||
def _status_label(code: int) -> str:
|
||||
if code >= 500:
|
||||
return "5xx"
|
||||
return str(code)
|
||||
|
||||
|
||||
# Gluetun control API — runs on port 8000 inside the gluetun container.
|
||||
# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
|
||||
GLUETUN_API = "http://127.0.0.1:8000"
|
||||
_ip_rotate_lock = threading.Lock()
|
||||
|
||||
|
||||
def rotate_ip() -> bool:
|
||||
"""Ask gluetun to reconnect to a different VPN server, getting a new IP.
|
||||
Returns True if the IP changed successfully."""
|
||||
with _ip_rotate_lock:
|
||||
log.info("Rotating VPN IP via gluetun...")
|
||||
try:
|
||||
# Get current IP
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
old_ip = (
|
||||
old_ip_resp.json().get("public_ip", "unknown")
|
||||
if old_ip_resp.status_code == 200
|
||||
else "unknown"
|
||||
)
|
||||
log.info("Current IP: %s", old_ip)
|
||||
|
||||
# Trigger server change — PUT with empty JSON body picks a random server
|
||||
resp = ctl.put(
|
||||
f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"}
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
time.sleep(2)
|
||||
|
||||
resp = ctl.put(
|
||||
f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"}
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
|
||||
return False
|
||||
|
||||
# Wait for reconnection
|
||||
for _ in range(30):
|
||||
time.sleep(2)
|
||||
try:
|
||||
with httpx.Client(timeout=10) as ctl:
|
||||
new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
|
||||
if new_ip_resp.status_code == 200:
|
||||
new_ip = new_ip_resp.json().get("public_ip", "")
|
||||
if new_ip and new_ip != old_ip:
|
||||
log.info("IP rotated: %s → %s", old_ip, new_ip)
|
||||
ip_rotations_total.labels(result="success").inc()
|
||||
return True
|
||||
except Exception:
|
||||
pass # VPN still reconnecting
|
||||
|
||||
log.warning("IP rotation timed out (may still be same IP)")
|
||||
ip_rotations_total.labels(result="failure").inc()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
log.error("IP rotation failed: %s", e)
|
||||
ip_rotations_total.labels(result="failure").inc()
|
||||
return False
|
||||
|
||||
|
||||
def make_client() -> httpx.Client:
|
||||
return httpx.Client(
|
||||
timeout=30,
|
||||
|
|
@ -104,23 +25,18 @@ def make_client() -> httpx.Client:
|
|||
def fetch_with_retry(
|
||||
client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
|
||||
) -> dict | None:
|
||||
"""GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure.
|
||||
On 403, triggers IP rotation and retries once."""
|
||||
endpoint = _endpoint_label(url)
|
||||
"""GET JSON with retries on 429/5xx/connection errors.
|
||||
|
||||
Returns None on permanent failure. The on_403 argument is kept for
|
||||
compatibility with older callers; 403 is now treated as non-retryable.
|
||||
"""
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
resp = client.get(url, params=params)
|
||||
http_requests_total.labels(
|
||||
status=_status_label(resp.status_code), endpoint=endpoint
|
||||
).inc()
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
if resp.status_code == 403 and on_403:
|
||||
log.warning("HTTP 403 — IP likely blocked, rotating...")
|
||||
if rotate_ip():
|
||||
# Retry once with new IP (but don't recurse on 403 again)
|
||||
return fetch_with_retry(client, url, params, on_403=False)
|
||||
log.error("IP rotation failed, giving up on %s", url)
|
||||
log.error("HTTP 403 from %s (forbidden)", url)
|
||||
return None
|
||||
if resp.status_code in (429, 500, 502, 503, 504):
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
|
|
@ -142,7 +58,6 @@ def fetch_with_retry(
|
|||
httpx.WriteTimeout,
|
||||
httpx.PoolTimeout,
|
||||
) as e:
|
||||
http_errors_total.labels(type=type(e).__name__).inc()
|
||||
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
|
||||
log.warning(
|
||||
"%s from %s, retry %d/%d in %.1fs",
|
||||
|
|
@ -153,6 +68,5 @@ def fetch_with_retry(
|
|||
delay,
|
||||
)
|
||||
time.sleep(delay)
|
||||
http_errors_total.labels(type="retry_exhausted").inc()
|
||||
log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
|
||||
return None
|
||||
|
|
|
|||
331
finder/main.py
331
finder/main.py
|
|
@ -1,211 +1,166 @@
|
|||
import argparse
|
||||
import logging
|
||||
import threading
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from flask import Flask, Response, jsonify, send_from_directory
|
||||
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
||||
from constants import DATA_DIR
|
||||
|
||||
from constants import (
|
||||
DATA_DIR,
|
||||
RUN_ON_STARTUP,
|
||||
SCHEDULE_HOUR,
|
||||
SCRAPE_HOMECOUK,
|
||||
SCRAPE_OPENRENT,
|
||||
SCRAPE_RIGHTMOVE,
|
||||
SCRAPE_ZOOPLA,
|
||||
|
||||
SOURCE_CHOICES = ("rightmove", "homecouk", "zoopla", "all")
|
||||
TEST_MAX_PROPERTIES_PER_SOURCE = 100
|
||||
TEST_OUTCODES = (
|
||||
"E1",
|
||||
"N1",
|
||||
"NW1",
|
||||
"SE1",
|
||||
"SW1",
|
||||
"W1",
|
||||
"WC1",
|
||||
"BR1",
|
||||
"CR0",
|
||||
"TW1",
|
||||
)
|
||||
from homecouk import load_cookies as load_homecouk_cookies
|
||||
from openrent import load_cookies as load_openrent_cookies
|
||||
from rightmove import outcode_cache
|
||||
from scraper import (
|
||||
_sync_gauges,
|
||||
|
||||
log = logging.getLogger("finder")
|
||||
|
||||
|
||||
def configure_standalone_runtime() -> None:
|
||||
"""Keep browser/cache/temp files on the project volume for local runs."""
|
||||
runtime_dir = DATA_DIR / ".runtime"
|
||||
cache_dir = runtime_dir / "cache"
|
||||
temp_dir = runtime_dir / "tmp"
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
os.environ.setdefault("XDG_CACHE_HOME", str(cache_dir))
|
||||
os.environ.setdefault("TMPDIR", str(temp_dir))
|
||||
tempfile.tempdir = str(temp_dir)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run a manual Greater London-ish property scrape."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
choices=SOURCE_CHOICES,
|
||||
default="all",
|
||||
help="Portal to scrape. 'all' runs Rightmove, home.co.uk, and Zoopla.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=DATA_DIR,
|
||||
help=f"Directory for parquet output. Defaults to {DATA_DIR}.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit-outcodes",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit outcodes for a quick manual smoke test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-properties-per-source",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Stop each source after this many transformed listings.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Run a small standalone smoke test: use likely London outcodes and "
|
||||
f"fetch at most {TEST_MAX_PROPERTIES_PER_SOURCE} listings per source."
|
||||
),
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def configure_logging() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def selected_sources(source: str) -> list[str]:
|
||||
if source == "all":
|
||||
return ["rightmove", "homecouk", "zoopla"]
|
||||
return [source]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
configure_standalone_runtime()
|
||||
configure_logging()
|
||||
|
||||
if args.limit_outcodes is not None and args.limit_outcodes < 1:
|
||||
raise SystemExit("--limit-outcodes must be greater than zero")
|
||||
if (
|
||||
args.max_properties_per_source is not None
|
||||
and args.max_properties_per_source < 1
|
||||
):
|
||||
raise SystemExit("--max-properties-per-source must be greater than zero")
|
||||
|
||||
output_dir = args.output_dir.expanduser().resolve()
|
||||
if args.test and args.output_dir == DATA_DIR:
|
||||
output_dir = (DATA_DIR / "test").expanduser().resolve()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
from scraper import (
|
||||
build_postcode_coords,
|
||||
build_postcode_index,
|
||||
load_outcodes,
|
||||
run_scrape,
|
||||
status,
|
||||
status_lock,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
LOG_DIR = Path("/app/data")
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.StreamHandler(),
|
||||
logging.FileHandler(LOG_DIR / "rightmove.log"),
|
||||
],
|
||||
)
|
||||
log = logging.getLogger("rightmove")
|
||||
log.setLevel(logging.DEBUG)
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
# Suppress noisy /metrics and /health request logs from werkzeug
|
||||
class _NoiseFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
msg = record.getMessage()
|
||||
return "GET /metrics" not in msg and "GET /health" not in msg
|
||||
|
||||
|
||||
logging.getLogger("werkzeug").addFilter(_NoiseFilter())
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Startup: load data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log.info("Loading arcgis data...")
|
||||
OUTCODES = load_outcodes()
|
||||
PC_INDEX = build_postcode_index()
|
||||
PC_COORDS = build_postcode_coords() if (SCRAPE_OPENRENT or SCRAPE_ZOOPLA) else None
|
||||
log.info(
|
||||
"Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s, zoopla=%s)",
|
||||
len(OUTCODES),
|
||||
SCRAPE_RIGHTMOVE,
|
||||
SCRAPE_HOMECOUK,
|
||||
SCRAPE_OPENRENT,
|
||||
SCRAPE_ZOOPLA,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _start_scrape() -> bool:
|
||||
"""Try to start a scrape. Returns True if started, False if already running."""
|
||||
with status_lock:
|
||||
if status.state == "running":
|
||||
return False
|
||||
status.state = "running"
|
||||
thread = threading.Thread(
|
||||
target=run_scrape, args=(OUTCODES, PC_INDEX, PC_COORDS), daemon=True
|
||||
)
|
||||
thread.start()
|
||||
return True
|
||||
|
||||
outcodes = load_outcodes()
|
||||
if args.test and args.limit_outcodes is None:
|
||||
preferred = [outcode for outcode in TEST_OUTCODES if outcode in set(outcodes)]
|
||||
if preferred:
|
||||
outcodes = preferred
|
||||
if args.limit_outcodes is not None:
|
||||
outcodes = outcodes[: args.limit_outcodes]
|
||||
|
||||
def _seconds_until(hour: int) -> float:
|
||||
"""Seconds from now until the next occurrence of `hour`:00 UTC."""
|
||||
now = datetime.now(timezone.utc)
|
||||
target = now.replace(hour=hour, minute=0, second=0, microsecond=0)
|
||||
if target <= now:
|
||||
target += timedelta(days=1)
|
||||
return (target - now).total_seconds()
|
||||
if not outcodes:
|
||||
raise SystemExit("No Greater London-ish outcodes loaded; nothing to scrape.")
|
||||
|
||||
sources = selected_sources(args.source)
|
||||
max_properties_per_source = args.max_properties_per_source
|
||||
if args.test and max_properties_per_source is None:
|
||||
max_properties_per_source = TEST_MAX_PROPERTIES_PER_SOURCE
|
||||
|
||||
def _scheduler_loop() -> None:
|
||||
"""Background thread that triggers a daily scrape at SCHEDULE_HOUR UTC."""
|
||||
log.info("Scheduler active — will run daily at %02d:00 UTC", SCHEDULE_HOUR)
|
||||
while True:
|
||||
wait = _seconds_until(SCHEDULE_HOUR)
|
||||
log.info(
|
||||
"Next scheduled scrape in %.0f seconds (%.1f hours)", wait, wait / 3600
|
||||
"Starting sale scrape: source=%s outcodes=%d output_dir=%s test=%s",
|
||||
args.source,
|
||||
len(outcodes),
|
||||
output_dir,
|
||||
args.test,
|
||||
)
|
||||
time.sleep(wait)
|
||||
log.info("Scheduled scrape triggered")
|
||||
if not _start_scrape():
|
||||
log.warning("Scheduled scrape skipped — already running")
|
||||
started = time.monotonic()
|
||||
|
||||
|
||||
if RUN_ON_STARTUP:
|
||||
log.info("RUN_ON_STARTUP=true — starting initial scrape")
|
||||
_start_scrape()
|
||||
|
||||
if SCHEDULE_HOUR >= 0:
|
||||
scheduler = threading.Thread(target=_scheduler_loop, daemon=True)
|
||||
scheduler.start()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Flask app
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/health")
|
||||
def health():
|
||||
return "ok", 200
|
||||
|
||||
|
||||
@app.route("/run", methods=["POST"])
|
||||
def trigger_run():
|
||||
if _start_scrape():
|
||||
return jsonify({"message": "Scrape started"}), 200
|
||||
return jsonify({"error": "Scrape already running"}), 409
|
||||
|
||||
|
||||
@app.route("/status")
|
||||
def get_status():
|
||||
with status_lock:
|
||||
elapsed = 0.0
|
||||
if status.started_at:
|
||||
end = status.finished_at if status.finished_at else time.time()
|
||||
elapsed = end - status.started_at
|
||||
resp = {
|
||||
"state": status.state,
|
||||
"channel": status.channel,
|
||||
"outcode": status.outcode,
|
||||
"outcodes_done": status.outcodes_done,
|
||||
"outcodes_total": status.outcodes_total,
|
||||
"properties_buy": status.properties_buy,
|
||||
"properties_rent": status.properties_rent,
|
||||
"properties_by_source": {
|
||||
"rightmove": status.rm_properties,
|
||||
"homecouk": status.hk_properties,
|
||||
"openrent": status.or_properties,
|
||||
"zoopla": status.zp_properties,
|
||||
},
|
||||
"errors": status.errors[-20:], # last 20 errors
|
||||
"elapsed_seconds": round(elapsed, 1),
|
||||
}
|
||||
if SCHEDULE_HOUR >= 0:
|
||||
resp["next_scrape_in_seconds"] = round(_seconds_until(SCHEDULE_HOUR))
|
||||
return jsonify(resp)
|
||||
|
||||
|
||||
@app.route("/debug")
|
||||
def get_debug():
|
||||
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
|
||||
or_cookies = load_openrent_cookies() if SCRAPE_OPENRENT else None
|
||||
return jsonify(
|
||||
{
|
||||
"outcode_cache_size": len(outcode_cache),
|
||||
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
|
||||
"scrape_rightmove": SCRAPE_RIGHTMOVE,
|
||||
"scrape_homecouk": SCRAPE_HOMECOUK,
|
||||
"scrape_openrent": SCRAPE_OPENRENT,
|
||||
"scrape_zoopla": SCRAPE_ZOOPLA,
|
||||
"homecouk_cookies_available": hk_cookies is not None,
|
||||
"openrent_cookies_available": or_cookies is not None,
|
||||
"zoopla_note": "browser-based (Camoufox), no cookies needed",
|
||||
}
|
||||
pc_index = build_postcode_index()
|
||||
pc_coords = build_postcode_coords() if "zoopla" in sources else None
|
||||
result = run_scrape(
|
||||
outcodes,
|
||||
pc_index,
|
||||
pc_coords=pc_coords,
|
||||
sources=sources,
|
||||
output_dir=output_dir,
|
||||
max_properties_per_source=max_properties_per_source,
|
||||
)
|
||||
|
||||
|
||||
@app.route("/metrics")
|
||||
def metrics():
|
||||
with status_lock:
|
||||
_sync_gauges()
|
||||
return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
@app.route("/data/<filename>")
|
||||
def serve_data(filename):
|
||||
if not filename.endswith(".parquet"):
|
||||
return jsonify({"error": "Only parquet files served"}), 400
|
||||
return send_from_directory(DATA_DIR, filename)
|
||||
elapsed = time.monotonic() - started
|
||||
log.info("Scrape finished in %.1fs", elapsed)
|
||||
log.info("Result: %s", result)
|
||||
if args.test and result.get("errors"):
|
||||
raise SystemExit("Test scrape failed; see errors in the result above.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=1234, debug=False)
|
||||
raise SystemExit(main())
|
||||
|
|
|
|||
|
|
@ -1,167 +0,0 @@
|
|||
from prometheus_client import Counter, Gauge
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gauges — current scrape state, updated after each outcode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
scrape_state = Gauge(
|
||||
"scrape_state",
|
||||
"Current scrape state as a labeled gauge (1 = active)",
|
||||
["state"],
|
||||
)
|
||||
|
||||
scrape_outcodes_done = Gauge(
|
||||
"scrape_outcodes_done",
|
||||
"Outcodes processed in current channel",
|
||||
)
|
||||
|
||||
scrape_outcodes_total = Gauge(
|
||||
"scrape_outcodes_total",
|
||||
"Total outcodes in current channel",
|
||||
)
|
||||
|
||||
scrape_properties_total = Gauge(
|
||||
"scrape_properties_total",
|
||||
"Properties found so far",
|
||||
["channel", "source"],
|
||||
)
|
||||
|
||||
scrape_elapsed_seconds = Gauge(
|
||||
"scrape_elapsed_seconds",
|
||||
"Seconds since scrape started",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — Rightmove (monotonically increasing)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
http_requests_total = Counter(
|
||||
"http_requests_total",
|
||||
"HTTP requests made to Rightmove",
|
||||
["status", "endpoint"],
|
||||
)
|
||||
|
||||
http_errors_total = Counter(
|
||||
"http_errors_total",
|
||||
"Rightmove HTTP connection/timeout errors",
|
||||
["type"],
|
||||
)
|
||||
|
||||
ip_rotations_total = Counter(
|
||||
"ip_rotations_total",
|
||||
"VPN IP rotation attempts",
|
||||
["result"],
|
||||
)
|
||||
|
||||
scrape_errors_total = Counter(
|
||||
"scrape_errors_total",
|
||||
"Per-outcode scrape errors",
|
||||
["source"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — home.co.uk
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
homecouk_requests_total = Counter(
|
||||
"homecouk_requests_total",
|
||||
"HTTP requests made to home.co.uk API",
|
||||
["status"],
|
||||
)
|
||||
|
||||
homecouk_errors_total = Counter(
|
||||
"homecouk_errors_total",
|
||||
"home.co.uk HTTP connection/timeout errors",
|
||||
["type"],
|
||||
)
|
||||
|
||||
homecouk_properties_scraped = Counter(
|
||||
"homecouk_properties_scraped",
|
||||
"Properties scraped from home.co.uk (before dedup)",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
cross_source_dedup_total = Counter(
|
||||
"cross_source_dedup_total",
|
||||
"Properties skipped because same property already found on another source",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — OpenRent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
openrent_requests_total = Counter(
|
||||
"openrent_requests_total",
|
||||
"HTTP requests made to OpenRent",
|
||||
["status"],
|
||||
)
|
||||
|
||||
openrent_errors_total = Counter(
|
||||
"openrent_errors_total",
|
||||
"OpenRent HTTP connection/timeout errors",
|
||||
["type"],
|
||||
)
|
||||
|
||||
openrent_properties_scraped = Counter(
|
||||
"openrent_properties_scraped",
|
||||
"Properties scraped from OpenRent (before dedup)",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — Zoopla
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
zoopla_pages_scraped = Counter(
|
||||
"zoopla_pages_scraped",
|
||||
"Search result pages scraped from Zoopla",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
zoopla_errors_total = Counter(
|
||||
"zoopla_errors_total",
|
||||
"Zoopla scraping errors",
|
||||
["type"],
|
||||
)
|
||||
|
||||
zoopla_properties_scraped = Counter(
|
||||
"zoopla_properties_scraped",
|
||||
"Properties scraped from Zoopla (before dedup)",
|
||||
["channel"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counters — FlareSolverr / cookie management
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
flaresolverr_attempts_total = Counter(
|
||||
"flaresolverr_attempts_total",
|
||||
"FlareSolverr Cloudflare challenge-solving attempts",
|
||||
["result"],
|
||||
)
|
||||
|
||||
cookie_refreshes_total = Counter(
|
||||
"cookie_refreshes_total",
|
||||
"home.co.uk cookie refresh attempts (triggered by 403)",
|
||||
["result"],
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gauges — home.co.uk state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
homecouk_enabled = Gauge(
|
||||
"homecouk_enabled",
|
||||
"Whether home.co.uk scraping is currently active (1=yes, 0=no)",
|
||||
)
|
||||
|
||||
openrent_enabled = Gauge(
|
||||
"openrent_enabled",
|
||||
"Whether OpenRent scraping is currently active (1=yes, 0=no)",
|
||||
)
|
||||
|
||||
zoopla_enabled = Gauge(
|
||||
"zoopla_enabled",
|
||||
"Whether Zoopla scraping is currently active (1=yes, 0=no)",
|
||||
)
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
|
||||
Hit the following url with the outcode as the location-id and the page. So for E13, page 2 it's:
|
||||
|
||||
https://www.onthemarket.com/async/search/properties-v2/?search-type=for-sale&location-id=e13&page=2&view=map-list
|
||||
|
||||
and the response is in [[response.json]]
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,869 +0,0 @@
|
|||
"""OpenRent (openrent.co.uk) scraper — rental properties only.
|
||||
|
||||
OpenRent is behind AWS WAF, so we use Playwright (headless Chromium) to solve
|
||||
the challenge and get valid cookies. Then we use curl_cffi with Chrome TLS
|
||||
impersonation to make requests with those cookies.
|
||||
|
||||
OpenRent is a rental-only platform, so this scraper only handles RENT channel.
|
||||
|
||||
HTML structure (as of 2026-03):
|
||||
Search results page renders property cards as <a class="pli search-property-card">.
|
||||
Each card contains:
|
||||
- Monthly price in <div class="pim"> with <span class="text-primary">£X,XXX</span>
|
||||
- Weekly price in <div class="piw"> (hidden by Alpine.js)
|
||||
- Title in <div class="fw-medium text-primary fs-3">N Bed Type, Location, OUTCODE</div>
|
||||
- Features in <ul> with <li> items like "1 Bed", "1 Bath", "Furnished"
|
||||
- Listing ID in data-listing-id on the .or-swiper div
|
||||
- Description snippet in <div class="line-clamp-2">
|
||||
|
||||
Detail page has:
|
||||
- <h1> with property title including outcode
|
||||
- <div id="map" data-lat="..." data-lng="..."> for coordinates
|
||||
- Tables with deposit, rent, furnishing, tenant preferences
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from curl_cffi.requests import Session
|
||||
from curl_cffi.requests.errors import RequestsError
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
from constants import (
|
||||
DELAY_BETWEEN_PAGES,
|
||||
MAX_BEDROOMS,
|
||||
OPENRENT_BASE,
|
||||
PROPERTY_TYPE_MAP,
|
||||
RETRY_BASE_DELAY,
|
||||
)
|
||||
from metrics import (
|
||||
flaresolverr_attempts_total,
|
||||
openrent_errors_total,
|
||||
openrent_properties_scraped,
|
||||
openrent_requests_total,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import normalize_postcode, normalize_sub_type, validate_floor_area
|
||||
|
||||
log = logging.getLogger("openrent")
|
||||
|
||||
|
||||
class WafChallengeError(Exception):
|
||||
"""Raised when OpenRent returns a WAF challenge, indicating cookies need refresh."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cookie / session management via Playwright
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def solve_waf() -> tuple[dict[str, str], str] | None:
|
||||
"""Use Playwright (headless Chromium) to solve the AWS WAF challenge.
|
||||
Returns (cookies_dict, user_agent) or None on failure."""
|
||||
log.info("Solving AWS WAF challenge via Playwright")
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(
|
||||
headless=True,
|
||||
args=["--no-sandbox", "--disable-blink-features=AutomationControlled"],
|
||||
)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
url = f"{OPENRENT_BASE}/properties-to-rent/?term=london&isLive=true"
|
||||
log.info("Navigating to %s", url)
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
|
||||
content = page.content()
|
||||
if "AwsWafIntegration" in content:
|
||||
log.info("Got WAF challenge page, waiting for resolution...")
|
||||
page.wait_for_selector(
|
||||
"a.pli, .pli, .search-property-card",
|
||||
timeout=30000,
|
||||
)
|
||||
|
||||
raw_cookies = context.cookies()
|
||||
user_agent = page.evaluate("navigator.userAgent")
|
||||
browser.close()
|
||||
|
||||
cookies = {c["name"]: c["value"] for c in raw_cookies}
|
||||
if "aws-waf-token" not in cookies:
|
||||
log.error("Playwright solved page but no aws-waf-token cookie found")
|
||||
flaresolverr_attempts_total.labels(result="no_cookies").inc()
|
||||
return None
|
||||
|
||||
log.info(
|
||||
"AWS WAF solved — got %d cookies, UA: %s",
|
||||
len(cookies),
|
||||
user_agent[:60],
|
||||
)
|
||||
flaresolverr_attempts_total.labels(result="success").inc()
|
||||
return cookies, user_agent
|
||||
|
||||
except Exception as e:
|
||||
log.error("Playwright WAF solve failed: %s", e)
|
||||
flaresolverr_attempts_total.labels(result="error").inc()
|
||||
return None
|
||||
|
||||
|
||||
def load_cookies() -> tuple[dict[str, str], str] | None:
|
||||
"""Get OpenRent cookies + user-agent.
|
||||
Tries Playwright first, then falls back to environment variables."""
|
||||
result = solve_waf()
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Fall back to env vars
|
||||
waf_token = os.environ.get("OPENRENT_WAF_TOKEN", "")
|
||||
if not waf_token:
|
||||
return None
|
||||
|
||||
user_agent = os.environ.get(
|
||||
"OPENRENT_USER_AGENT",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/145.0.0.0 Safari/537.36",
|
||||
)
|
||||
return {"aws-waf-token": waf_token}, user_agent
|
||||
|
||||
|
||||
def make_client(cookies: dict[str, str], user_agent: str) -> Session:
|
||||
"""Create a curl_cffi Session configured for OpenRent.
|
||||
Uses Chrome TLS impersonation so AWS WAF cookies remain valid."""
|
||||
session = Session(impersonate="chrome")
|
||||
session.headers.update(
|
||||
{
|
||||
"User-Agent": user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-GB,en;q=0.9",
|
||||
}
|
||||
)
|
||||
for name, value in cookies.items():
|
||||
session.cookies.set(name, value, domain="openrent.co.uk")
|
||||
return session
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP fetch with retry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _status_label(code: int) -> str:
|
||||
if code >= 500:
|
||||
return "5xx"
|
||||
return str(code)
|
||||
|
||||
|
||||
def fetch_page(
|
||||
client: Session,
|
||||
url: str,
|
||||
max_retries: int = 3,
|
||||
) -> str | None:
|
||||
"""GET HTML with retries on 429/5xx. Returns None on permanent failure.
|
||||
WAF challenge (202 or 403 with challenge JS) raises WafChallengeError."""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
resp = client.get(url, timeout=30)
|
||||
openrent_requests_total.labels(status=_status_label(resp.status_code)).inc()
|
||||
|
||||
if resp.status_code == 200:
|
||||
html = resp.text
|
||||
# Detect WAF challenge page masquerading as 200
|
||||
if "AwsWafIntegration" in html and "challenge.js" in html:
|
||||
raise WafChallengeError(
|
||||
"Got AWS WAF challenge page — cookies expired"
|
||||
)
|
||||
return html
|
||||
|
||||
if resp.status_code in (202, 403):
|
||||
raise WafChallengeError(
|
||||
f"HTTP {resp.status_code} — cookies likely expired"
|
||||
)
|
||||
|
||||
if resp.status_code in (429, 500, 502, 503, 504):
|
||||
delay = RETRY_BASE_DELAY * (2**attempt)
|
||||
log.warning(
|
||||
"HTTP %d from %s, retry %d/%d in %.1fs",
|
||||
resp.status_code,
|
||||
url,
|
||||
attempt + 1,
|
||||
max_retries,
|
||||
delay,
|
||||
)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
|
||||
return None
|
||||
|
||||
except WafChallengeError:
|
||||
raise
|
||||
except RequestsError as e:
|
||||
openrent_errors_total.labels(type=type(e).__name__).inc()
|
||||
delay = RETRY_BASE_DELAY * (2**attempt)
|
||||
log.warning(
|
||||
"%s from %s, retry %d/%d in %.1fs",
|
||||
type(e).__name__,
|
||||
url,
|
||||
attempt + 1,
|
||||
max_retries,
|
||||
delay,
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
openrent_errors_total.labels(type="retry_exhausted").inc()
|
||||
log.error("All %d retries exhausted for %s", max_retries, url)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _extract_price_from_element(el) -> tuple[int, str] | None:
|
||||
"""Extract price integer from a price element's text like '£2,100'."""
|
||||
if not el:
|
||||
return None
|
||||
text = el.get_text(strip=True)
|
||||
match = re.search(r"£([\d,]+)", text)
|
||||
if not match:
|
||||
return None
|
||||
return int(match.group(1).replace(",", ""))
|
||||
|
||||
|
||||
def _extract_price(text: str) -> tuple[int, str] | None:
|
||||
"""Extract price and frequency from text like '£1,500 pcm' or '£350 pw'.
|
||||
Returns (price_int, frequency) or None.
|
||||
|
||||
OpenRent card text shows both monthly and weekly prices (e.g.
|
||||
'£2,800 per month £646 per week'), so check monthly *before* weekly
|
||||
to match the first (monthly) price that the regex captures."""
|
||||
match = re.search(r"£([\d,]+)", text)
|
||||
if not match:
|
||||
return None
|
||||
price = int(match.group(1).replace(",", ""))
|
||||
lower = text.lower()
|
||||
if "pcm" in lower or "per month" in lower or "/m" in lower:
|
||||
return price, "monthly"
|
||||
if "pw" in lower or "per week" in lower or "/w" in lower:
|
||||
return price, "weekly"
|
||||
if "pa" in lower or "per annum" in lower or "/y" in lower:
|
||||
return price, "yearly"
|
||||
# OpenRent defaults to pcm (per calendar month)
|
||||
return price, "monthly"
|
||||
|
||||
|
||||
def _extract_bedrooms_from_title(title: str) -> int | None:
|
||||
"""Extract bedroom count from title like '2 Bed Flat, Pimlico'."""
|
||||
match = re.search(r"(\d+)\s*bed", title, re.IGNORECASE)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
if re.search(r"\bstudio\b", title, re.IGNORECASE):
|
||||
return 0
|
||||
return None
|
||||
|
||||
|
||||
def _extract_beds_baths_from_features(
|
||||
feature_items: list,
|
||||
) -> tuple[int | None, int | None]:
|
||||
"""Extract bedrooms and bathrooms from feature list items.
|
||||
|
||||
OpenRent search cards have <ul> with items like:
|
||||
<li>1 Bed</li> <li>1 Bath</li> <li>Furnished</li>
|
||||
"""
|
||||
bedrooms = None
|
||||
bathrooms = None
|
||||
for li in feature_items:
|
||||
text = li.get_text(strip=True).lower()
|
||||
bed_match = re.search(r"(\d+)\s*bed", text)
|
||||
if bed_match:
|
||||
bedrooms = int(bed_match.group(1))
|
||||
bath_match = re.search(r"(\d+)\s*bath", text)
|
||||
if bath_match:
|
||||
bathrooms = int(bath_match.group(1))
|
||||
return bedrooms, bathrooms
|
||||
|
||||
|
||||
def _extract_postcode(text: str) -> str | None:
|
||||
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.
|
||||
Normalizes to include a space before the 3-char incode."""
|
||||
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
|
||||
if match:
|
||||
raw = match.group(1).upper().strip()
|
||||
# Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT"
|
||||
if " " not in raw and len(raw) >= 5:
|
||||
return raw[:-3] + " " + raw[-3:]
|
||||
return raw
|
||||
return None
|
||||
|
||||
|
||||
def _extract_outcode(text: str) -> str | None:
|
||||
"""Extract UK outcode from text like '1 Bed Flat, Bank Chambers, SW1Y'.
|
||||
|
||||
Looks for an outcode pattern (e.g., SW1Y, E1, EC2A) at the end of the text
|
||||
or after the last comma."""
|
||||
# Try after last comma first (most reliable position in OpenRent titles)
|
||||
parts = text.split(",")
|
||||
if len(parts) > 1:
|
||||
last_part = parts[-1].strip()
|
||||
match = re.match(r"^([A-Z]{1,2}\d[A-Z0-9]?)$", last_part, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).upper()
|
||||
|
||||
# Fall back to searching anywhere in text
|
||||
match = re.search(r"\b([A-Z]{1,2}\d[A-Z0-9]?)\b", text, re.IGNORECASE)
|
||||
if match:
|
||||
candidate = match.group(1).upper()
|
||||
# Avoid matching things like "1 Bed" → "1B"
|
||||
if len(candidate) >= 2 and not candidate[0].isdigit():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _infer_property_type(title: str) -> str:
|
||||
"""Infer property type from title text.
|
||||
|
||||
Order matters: "Room in a Shared Flat" should be "Room" not "Flat",
|
||||
so check "room" before "flat"."""
|
||||
lower = title.lower()
|
||||
if "room in" in lower or "room " in lower:
|
||||
return "Room"
|
||||
if "studio" in lower:
|
||||
return "Studio"
|
||||
if "flat" in lower or "apartment" in lower:
|
||||
return "Flat"
|
||||
if "maisonette" in lower:
|
||||
return "Maisonette"
|
||||
if "house" in lower:
|
||||
return "House"
|
||||
if "bungalow" in lower:
|
||||
return "Bungalow"
|
||||
return ""
|
||||
|
||||
|
||||
def parse_search_results(html: str) -> list[dict]:
|
||||
"""Parse property data from OpenRent search results HTML.
|
||||
|
||||
Returns list of raw property dicts extracted from property cards.
|
||||
|
||||
Current OpenRent card structure (2026-03):
|
||||
<a class="pli search-property-card" href="/property-to-rent/.../ID">
|
||||
<div class="or-swiper" data-listing-id="ID">
|
||||
<div class="pim"><span class="text-primary">£2,100</span> per month</div>
|
||||
<div class="piw"><span class="text-primary">£485</span> per week</div>
|
||||
<div class="fw-medium text-primary fs-3">1 Bed Flat, Location, SW1Y</div>
|
||||
<ul>...<li>1 Bed</li><li>1 Bath</li><li>Furnished</li>...</ul>
|
||||
"""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
properties = []
|
||||
|
||||
# Property cards: <a class="pli search-property-card">
|
||||
cards = soup.select("a.pli")
|
||||
if not cards:
|
||||
cards = soup.find_all("a", href=re.compile(r"/property-to-rent/"))
|
||||
|
||||
if not cards:
|
||||
log.warning(
|
||||
"No property cards found in search HTML (%d bytes). "
|
||||
"CSS selectors may need updating.",
|
||||
len(html),
|
||||
)
|
||||
return []
|
||||
|
||||
for card in cards:
|
||||
prop: dict = {}
|
||||
|
||||
# Extract property URL and ID from href
|
||||
href = card.get("href", "")
|
||||
if not href:
|
||||
continue
|
||||
|
||||
prop["url"] = href if href.startswith("http") else OPENRENT_BASE + href
|
||||
id_match = re.search(r"/(\d+)(?:\?|$|#)", href)
|
||||
if id_match:
|
||||
prop["id"] = id_match.group(1)
|
||||
else:
|
||||
# Try data-listing-id on the swiper element
|
||||
swiper = card.select_one("[data-listing-id]")
|
||||
if swiper:
|
||||
prop["id"] = swiper["data-listing-id"]
|
||||
else:
|
||||
continue # can't use a property without an ID
|
||||
|
||||
# --- Price ---
|
||||
# Prefer structured price elements over free-text parsing.
|
||||
# Monthly price is in <div class="pim"><span class="text-primary">£X</span>
|
||||
pim = card.select_one(".pim .text-primary, .pim span")
|
||||
piw = card.select_one(".piw .text-primary, .piw span")
|
||||
|
||||
monthly_price = _extract_price_from_element(pim)
|
||||
weekly_price = _extract_price_from_element(piw)
|
||||
|
||||
if monthly_price:
|
||||
prop["price"] = monthly_price
|
||||
prop["frequency"] = "monthly"
|
||||
elif weekly_price:
|
||||
prop["price"] = weekly_price
|
||||
prop["frequency"] = "weekly"
|
||||
else:
|
||||
# Fall back to parsing card text
|
||||
card_text = card.get_text(" ", strip=True)
|
||||
price_result = _extract_price(card_text)
|
||||
if price_result:
|
||||
prop["price"], prop["frequency"] = price_result
|
||||
|
||||
# --- Title / Address ---
|
||||
# The property title is in a div with classes "fw-medium text-primary fs-3"
|
||||
# e.g., "1 Bed Flat, Bank Chambers, SW1Y"
|
||||
title_el = card.select_one("div.fw-medium.fs-3")
|
||||
if not title_el:
|
||||
# Fallback: try image alt text which also has the title
|
||||
img = card.select_one("img.propertyPic")
|
||||
if img and img.get("alt"):
|
||||
prop["title"] = img["alt"]
|
||||
else:
|
||||
# Last resort: extract from card text, excluding price/nav noise
|
||||
prop["title"] = ""
|
||||
else:
|
||||
prop["title"] = title_el.get_text(strip=True)
|
||||
|
||||
# --- Bedrooms / Bathrooms from feature list ---
|
||||
feature_list = card.select("ul li")
|
||||
beds_from_features, baths_from_features = _extract_beds_baths_from_features(
|
||||
feature_list,
|
||||
)
|
||||
|
||||
# Bedrooms: prefer feature list, fall back to title parsing
|
||||
if beds_from_features is not None:
|
||||
prop["bedrooms"] = beds_from_features
|
||||
else:
|
||||
beds = _extract_bedrooms_from_title(prop.get("title", ""))
|
||||
if beds is not None:
|
||||
prop["bedrooms"] = beds
|
||||
|
||||
if baths_from_features is not None:
|
||||
prop["bathrooms"] = baths_from_features
|
||||
|
||||
# --- Property type from title ---
|
||||
title = prop.get("title", "")
|
||||
prop["property_type"] = _infer_property_type(title)
|
||||
|
||||
# --- Postcode / outcode from title ---
|
||||
postcode = _extract_postcode(title)
|
||||
if postcode:
|
||||
prop["postcode"] = postcode
|
||||
else:
|
||||
outcode = _extract_outcode(title)
|
||||
if outcode:
|
||||
prop["outcode"] = outcode
|
||||
|
||||
# --- Description snippet ---
|
||||
desc_el = card.select_one(".line-clamp-2")
|
||||
if desc_el:
|
||||
prop["description"] = desc_el.get_text(strip=True)
|
||||
|
||||
# --- Coordinates from data attributes (may not be present on cards) ---
|
||||
for el in [card] + card.select("[data-lat], [data-latitude]"):
|
||||
lat = el.get("data-lat") or el.get("data-latitude")
|
||||
lng = el.get("data-lng") or el.get("data-longitude") or el.get("data-lon")
|
||||
if lat and lng:
|
||||
try:
|
||||
prop["lat"] = float(lat)
|
||||
prop["lng"] = float(lng)
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
|
||||
properties.append(prop)
|
||||
|
||||
log.debug("Parsed %d property cards from search HTML", len(properties))
|
||||
return properties
|
||||
|
||||
|
||||
def parse_property_detail(html: str) -> dict:
|
||||
"""Parse a single property detail page for additional data.
|
||||
|
||||
Current detail page structure (2026-03):
|
||||
- <h1> has the full title (e.g., "Room in a Shared House, Lime Tree Court, AL2")
|
||||
- <div id="map" data-lat="..." data-lng="..."> has coordinates
|
||||
- Tables have "Rent PCM", "Deposit", "Bills Included", etc. (NOT bedrooms)
|
||||
- Description in elements with class containing "description"
|
||||
"""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
details: dict = {}
|
||||
|
||||
# --- Title from h1 ---
|
||||
h1 = soup.select_one("h1")
|
||||
if h1:
|
||||
title_text = h1.get_text(strip=True)
|
||||
# Validate it's not a nav/modal element (e.g. "Log in")
|
||||
if len(title_text) > 10 and "log in" not in title_text.lower():
|
||||
details["title"] = title_text
|
||||
postcode = _extract_postcode(title_text)
|
||||
if postcode:
|
||||
details["postcode"] = postcode
|
||||
|
||||
# --- Coordinates from map element ---
|
||||
# The map div has id="map" with data-lat and data-lng
|
||||
map_el = soup.select_one("#map[data-lat]")
|
||||
if not map_el:
|
||||
# Fallback: any element with data-lat (but prefer #map)
|
||||
map_el = soup.select_one("[data-lat]")
|
||||
if map_el:
|
||||
lat = map_el.get("data-lat")
|
||||
lng = map_el.get("data-lng") or map_el.get("data-lon")
|
||||
if lat and lng:
|
||||
try:
|
||||
details["lat"] = float(lat)
|
||||
details["lng"] = float(lng)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# --- Parse tables for rent and property details ---
|
||||
for table in soup.select("table"):
|
||||
for row in table.select("tr"):
|
||||
cells = row.select("td")
|
||||
if len(cells) < 2:
|
||||
continue
|
||||
label = cells[0].get_text(strip=True).lower()
|
||||
value = cells[1].get_text(strip=True)
|
||||
|
||||
if "rent" in label and "pcm" in label:
|
||||
match = re.search(r"£([\d,]+)", value)
|
||||
if match:
|
||||
details["price"] = int(match.group(1).replace(",", ""))
|
||||
elif "bedroom" in label:
|
||||
match = re.search(r"(\d+)", value)
|
||||
if match:
|
||||
details["bedrooms"] = int(match.group(1))
|
||||
elif "bathroom" in label:
|
||||
match = re.search(r"(\d+)", value)
|
||||
if match:
|
||||
details["bathrooms"] = int(match.group(1))
|
||||
elif "type" in label and "property" in label:
|
||||
details["property_type"] = value
|
||||
elif "available" in label or "move" in label:
|
||||
details["available_date"] = value
|
||||
elif "furnish" in label:
|
||||
details["furnished"] = value
|
||||
|
||||
# --- Coordinates from inline JavaScript (last resort) ---
|
||||
if "lat" not in details:
|
||||
for script in soup.select("script"):
|
||||
text = script.string or ""
|
||||
lat_match = re.search(r'"latitude"\s*:\s*([\d.-]+)', text)
|
||||
lng_match = re.search(r'"longitude"\s*:\s*([\d.-]+)', text)
|
||||
if lat_match and lng_match:
|
||||
try:
|
||||
details["lat"] = float(lat_match.group(1))
|
||||
details["lng"] = float(lng_match.group(1))
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
|
||||
# --- Description for floor area ---
|
||||
desc_el = soup.select_one(".description, [class*='description'], #description")
|
||||
if desc_el:
|
||||
details["description"] = desc_el.get_text(strip=True)
|
||||
|
||||
return details
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Property type mapping & floor area
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def map_property_type(raw_type: str | None) -> str:
|
||||
"""Map OpenRent property type to canonical type."""
|
||||
if not raw_type:
|
||||
return "Other"
|
||||
canonical = PROPERTY_TYPE_MAP.get(raw_type)
|
||||
if canonical:
|
||||
return canonical
|
||||
lower = raw_type.lower()
|
||||
if "room" in lower or "shared" in lower:
|
||||
return "Other"
|
||||
if (
|
||||
"flat" in lower
|
||||
or "apartment" in lower
|
||||
or "maisonette" in lower
|
||||
or "studio" in lower
|
||||
):
|
||||
return "Flats/Maisonettes"
|
||||
if "detached" in lower and "semi" not in lower:
|
||||
return "Detached"
|
||||
if "semi" in lower:
|
||||
return "Semi-Detached"
|
||||
if "terrace" in lower or "mews" in lower:
|
||||
return "Terraced"
|
||||
if "house" in lower:
|
||||
return "Detached"
|
||||
log.debug("Unknown property type: %r — mapping to Other", raw_type)
|
||||
return "Other"
|
||||
|
||||
|
||||
def parse_floor_area(description: str | None) -> float | None:
|
||||
"""Try to extract floor area from description text."""
|
||||
if not description:
|
||||
return None
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
|
||||
if m:
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Transform & search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _resolve_outcode_postcodes(
|
||||
outcode: str,
|
||||
pc_coords: dict[str, tuple[float, float]],
|
||||
) -> list[str]:
|
||||
"""Get all postcodes for an outcode from the postcode coordinates lookup."""
|
||||
# ONSPD 7-char format: 4-char outcodes have no space before incode
|
||||
# (e.g., "BH191AB"), while shorter outcodes do (e.g., "E14 5AB").
|
||||
prefix = outcode + " "
|
||||
results = [pcd for pcd in pc_coords if pcd.startswith(prefix)]
|
||||
if not results and len(outcode) >= 4:
|
||||
results = [pcd for pcd in pc_coords if pcd.startswith(outcode) and len(pcd) > len(outcode)]
|
||||
return results
|
||||
|
||||
|
||||
def _parse_or_date(date_str: str) -> str:
|
||||
"""Parse OpenRent date strings to ISO format (YYYY-MM-DD).
|
||||
Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats."""
|
||||
if not date_str:
|
||||
return ""
|
||||
stripped = date_str.strip()
|
||||
lower = stripped.lower()
|
||||
if lower == "today":
|
||||
from datetime import datetime
|
||||
return datetime.now().strftime("%Y-%m-%d")
|
||||
if lower == "tomorrow":
|
||||
from datetime import datetime, timedelta
|
||||
return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
# Try "DD Month, YYYY" format (e.g., "01 April, 2026")
|
||||
from datetime import datetime
|
||||
for fmt in ("%d %B, %Y", "%d %B %Y"):
|
||||
try:
|
||||
return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d")
|
||||
except ValueError:
|
||||
continue
|
||||
return date_str # Return as-is if unparseable
|
||||
|
||||
|
||||
def transform_property(
|
||||
search_data: dict,
|
||||
detail_data: dict | None,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
pc_coords: dict[str, tuple[float, float]],
|
||||
) -> dict | None:
|
||||
"""Transform OpenRent property data into our output schema.
|
||||
|
||||
Merges data from the search results page and (optionally) the detail page.
|
||||
Uses pc_coords (postcode -> lat/lng) as a fallback when coordinates are
|
||||
missing but a postcode is available.
|
||||
"""
|
||||
detail = detail_data or {}
|
||||
|
||||
# Merge: detail page data takes precedence
|
||||
lat = detail.get("lat") or search_data.get("lat")
|
||||
lng = detail.get("lng") or search_data.get("lng")
|
||||
price = detail.get("price") or search_data.get("price")
|
||||
if not price or int(price) <= 0:
|
||||
return None
|
||||
|
||||
frequency = search_data.get("frequency", "monthly")
|
||||
|
||||
# Get postcode: detail page > search card
|
||||
postcode = detail.get("postcode") or search_data.get("postcode")
|
||||
|
||||
if lat is not None and lng is not None:
|
||||
# Validate coordinates are in England
|
||||
if not (49 <= lat <= 56 and -7 <= lng <= 2):
|
||||
log.debug("Coords outside England: lat=%.4f lng=%.4f — skipping", lat, lng)
|
||||
return None
|
||||
if not postcode:
|
||||
if pc_index:
|
||||
postcode = pc_index.nearest(lat, lng)
|
||||
elif search_data.get("outcode"):
|
||||
# No spatial index — try outcode lookup as fallback
|
||||
outcode_pcs = _resolve_outcode_postcodes(
|
||||
search_data["outcode"],
|
||||
pc_coords,
|
||||
)
|
||||
if outcode_pcs:
|
||||
postcode = outcode_pcs[0]
|
||||
elif postcode:
|
||||
# Have postcode but no coordinates — look up centroid from arcgis data
|
||||
coords = pc_coords.get(postcode)
|
||||
if coords:
|
||||
lat, lng = coords
|
||||
else:
|
||||
log.debug("Postcode %s not in arcgis data — skipping", postcode)
|
||||
return None
|
||||
elif search_data.get("outcode"):
|
||||
# Have only outcode — find postcodes in that outcode and use centroid
|
||||
outcode = search_data["outcode"]
|
||||
outcode_postcodes = _resolve_outcode_postcodes(outcode, pc_coords)
|
||||
if outcode_postcodes:
|
||||
# Use the first postcode as a rough approximation
|
||||
postcode = outcode_postcodes[0]
|
||||
lat, lng = pc_coords[postcode]
|
||||
else:
|
||||
log.debug("No postcodes found for outcode %s — skipping", outcode)
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
if not postcode:
|
||||
log.debug("No postcode for property — skipping")
|
||||
return None
|
||||
|
||||
raw_beds = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
|
||||
raw_baths = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"OpenRent %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
search_data.get("id", "?"), raw_beds, raw_baths,
|
||||
)
|
||||
|
||||
# Title: prefer detail page (has h1 with full title)
|
||||
title = detail.get("title") or search_data.get("title", "")
|
||||
|
||||
# Address: take the middle part of the title (skip the "N Bed Type" prefix
|
||||
# and the outcode suffix). E.g., "1 Bed Flat, Bank Chambers, SW1Y" -> "Bank Chambers"
|
||||
address = ""
|
||||
if title:
|
||||
parts = [p.strip() for p in title.split(",")]
|
||||
if len(parts) >= 3:
|
||||
# Skip first (type) and last (outcode), join the middle
|
||||
address = ", ".join(parts[1:-1])
|
||||
elif len(parts) == 2:
|
||||
# Could be "Location, OUTCODE" or "Type, Location"
|
||||
# If last part looks like an outcode, use the first part
|
||||
if re.match(r"^[A-Z]{1,2}\d", parts[-1].strip()):
|
||||
address = parts[0]
|
||||
else:
|
||||
address = parts[1]
|
||||
else:
|
||||
address = title
|
||||
|
||||
# Property type: prefer detail, then search card, then infer from title
|
||||
property_type = detail.get("property_type") or search_data.get("property_type", "")
|
||||
if not property_type and title:
|
||||
property_type = _infer_property_type(title)
|
||||
|
||||
prop_id = search_data.get("id", "")
|
||||
listing_url = search_data.get(
|
||||
"url",
|
||||
f"{OPENRENT_BASE}/{prop_id}" if prop_id else "",
|
||||
)
|
||||
description = detail.get("description") or search_data.get("description", "")
|
||||
|
||||
return {
|
||||
"id": f"or_{prop_id}",
|
||||
"Bedrooms": bedrooms,
|
||||
"Bathrooms": bathrooms,
|
||||
"Number of bedrooms & living rooms": bedrooms,
|
||||
"lon": lng,
|
||||
"lat": lat,
|
||||
"Postcode": normalize_postcode(postcode),
|
||||
"Address per Property Register": address,
|
||||
# OpenRent is a rental-only platform — tenure (Freehold/Leasehold) is a
|
||||
# property ownership concept that doesn't apply to rental listings. The
|
||||
# landlord's tenure is not shown on OpenRent listing pages.
|
||||
"Leasehold/Freehold": None,
|
||||
"Property type": map_property_type(property_type),
|
||||
"Property sub-type": normalize_sub_type(property_type),
|
||||
"price": int(price),
|
||||
"price_frequency": frequency,
|
||||
"Price qualifier": "",
|
||||
"Total floor area (sqm)": parse_floor_area(description),
|
||||
"Listing URL": listing_url,
|
||||
"Listing features": [],
|
||||
"first_visible_date": _parse_or_date(detail.get("available_date", "")),
|
||||
}
|
||||
|
||||
|
||||
def search_outcode(
|
||||
client: Session,
|
||||
outcode: str,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
pc_coords: dict[str, tuple[float, float]],
|
||||
fetch_details: bool = True,
|
||||
) -> list[dict]:
|
||||
"""Search OpenRent for rental properties in one outcode.
|
||||
|
||||
1. Fetches the search results page for the outcode
|
||||
2. Parses property cards from the HTML (title, price, beds, baths)
|
||||
3. Fetches each property's detail page for coordinates
|
||||
4. Transforms to common output schema
|
||||
|
||||
The search card provides most data (price, bedrooms, bathrooms, title,
|
||||
property type). Detail pages are needed primarily for precise coordinates
|
||||
and full postcodes. When detail pages fail, we fall back to outcode-level
|
||||
coordinates from the postcode lookup.
|
||||
"""
|
||||
search_url = f"{OPENRENT_BASE}/properties-to-rent/?term={outcode}&isLive=true"
|
||||
|
||||
html = fetch_page(client, search_url)
|
||||
if not html:
|
||||
return []
|
||||
|
||||
search_results = parse_search_results(html)
|
||||
if not search_results:
|
||||
return []
|
||||
|
||||
properties = []
|
||||
for search_data in search_results:
|
||||
detail_data = None
|
||||
|
||||
# Skip detail page if we already have coordinates or a resolvable postcode
|
||||
has_coords = (
|
||||
search_data.get("lat") is not None
|
||||
and search_data.get("lng") is not None
|
||||
)
|
||||
has_resolvable_pc = (
|
||||
search_data.get("postcode")
|
||||
and pc_coords
|
||||
and search_data["postcode"] in pc_coords
|
||||
)
|
||||
needs_detail = (
|
||||
fetch_details
|
||||
and search_data.get("url")
|
||||
and not has_coords
|
||||
and not has_resolvable_pc
|
||||
)
|
||||
|
||||
if needs_detail:
|
||||
detail_html = fetch_page(client, search_data["url"])
|
||||
if detail_html:
|
||||
detail_data = parse_property_detail(detail_html)
|
||||
# Shorter delay for detail pages (within same outcode)
|
||||
time.sleep(0.15)
|
||||
|
||||
transformed = transform_property(
|
||||
search_data,
|
||||
detail_data,
|
||||
pc_index,
|
||||
pc_coords,
|
||||
)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
openrent_properties_scraped.labels(channel="rent").inc()
|
||||
|
||||
return properties
|
||||
|
|
@ -3,15 +3,10 @@ name = "finder"
|
|||
version = "0.1.0"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"flask",
|
||||
"httpx",
|
||||
"curl_cffi",
|
||||
"polars",
|
||||
"fake-useragent>=2.2.0",
|
||||
"prometheus-client",
|
||||
"beautifulsoup4",
|
||||
"lxml",
|
||||
"playwright>=1.58.0",
|
||||
"playwright-stealth>=2.0.2",
|
||||
"camoufox>=0.4.11",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ def _paginate(
|
|||
channel_cfg: dict,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
extra_params: dict | None = None,
|
||||
max_properties: int | None = None,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""Paginate through search results. Returns (properties, result_count)."""
|
||||
properties = []
|
||||
|
|
@ -94,6 +95,8 @@ def _paginate(
|
|||
transformed = transform_property(prop, outcode, pc_index)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties, result_count
|
||||
|
||||
# Check if there are more pages
|
||||
result_count_str = data.get("resultCount", "0")
|
||||
|
|
@ -114,6 +117,7 @@ def search_outcode(
|
|||
outcode: str,
|
||||
channel_cfg: dict,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
max_properties: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Paginate through search results for one outcode+channel. Returns transformed properties.
|
||||
|
||||
|
|
@ -121,9 +125,12 @@ def search_outcode(
|
|||
re-queries per property type to recover listings beyond the cap.
|
||||
"""
|
||||
properties, result_count = _paginate(
|
||||
client, outcode_id, outcode, channel_cfg, pc_index
|
||||
client, outcode_id, outcode, channel_cfg, pc_index, max_properties=max_properties
|
||||
)
|
||||
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties[:max_properties]
|
||||
|
||||
if result_count <= _MAX_INDEX:
|
||||
return properties
|
||||
|
||||
|
|
@ -140,17 +147,28 @@ def search_outcode(
|
|||
pt_props, _ = _paginate(
|
||||
client, outcode_id, outcode, channel_cfg, pc_index,
|
||||
extra_params={"propertyTypes": pt},
|
||||
max_properties=max_properties,
|
||||
)
|
||||
new = 0
|
||||
for p in pt_props:
|
||||
if p["id"] not in all_by_id:
|
||||
all_by_id[p["id"]] = p
|
||||
new += 1
|
||||
if (
|
||||
max_properties is not None
|
||||
and len(all_by_id) >= max_properties
|
||||
):
|
||||
break
|
||||
if new:
|
||||
log.debug("%s/%s type=%s: +%d new properties", outcode, ch, pt, new)
|
||||
if max_properties is not None and len(all_by_id) >= max_properties:
|
||||
break
|
||||
|
||||
log.info(
|
||||
"%s/%s: type split recovered %d → %d properties",
|
||||
outcode, ch, len(properties), len(all_by_id),
|
||||
)
|
||||
return list(all_by_id.values())
|
||||
properties = list(all_by_id.values())
|
||||
if max_properties is not None:
|
||||
return properties[:max_properties]
|
||||
return properties
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,52 +0,0 @@
|
|||
The API works as follows, you must search for outcodes, such as E11, then hit https://los.rightmove.co.uk/typeahead?query=E11&limit=10&exclude=STREET which will return something like:
|
||||
|
||||
{
|
||||
"matches": [
|
||||
{
|
||||
"id": "746",
|
||||
"type": "OUTCODE",
|
||||
"displayName": "E11",
|
||||
"highlighting": "<span class='highlightLetter'>E11</span>",
|
||||
"highlights": [
|
||||
{
|
||||
"text": "E11",
|
||||
"highlighted": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "749",
|
||||
"type": "OUTCODE",
|
||||
"displayName": "E14",
|
||||
"highlighting": "displayName",
|
||||
"highlights": []
|
||||
},
|
||||
{
|
||||
"id": "752",
|
||||
"type": "OUTCODE",
|
||||
"displayName": "E17",
|
||||
"highlighting": "displayName",
|
||||
"highlights": []
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
We need to find the id of the object which has "type": "OUTCODE", and displayName matching the outcode we searched for, in this case E11, which is 746. Then we can hit the search endpoint with that id, and it will return the properties for that outcode:
|
||||
|
||||
https://www.rightmove.co.uk/api/property-search/listing/search?useLocationIdentifier=true&locationIdentifier=OUTCODE%5E746&buy=For+sale&_includeSSTC=on&index=0&sortType=2&channel=BUY&transactionType=BUY&displayLocationIdentifier=E12.html
|
||||
|
||||
You can see the example response to this at [[buy.json]]
|
||||
|
||||
You must set locationIdentifier=OUTCODE%5E{id} where id is 746 in this case, so it's 746 locationIdentifier=OUTCODE%5E746. Paging works by increasing index by the number of results per page, which is 24. So the next page would be index=24, then index=48, etc.
|
||||
|
||||
|
||||
The rental endpoint works similarly:
|
||||
|
||||
https://www.rightmove.co.uk/api/property-search/listing/search?locationIdentifier=OUTCODE%5E745&index=0&sortType=6&channel=RENT&transactionType=LETTING&displayLocationIdentifier=E16.html
|
||||
|
||||
https://www.rightmove.co.uk/api/property-search/listing/search?locationIdentifier=OUTCODE%5E752&index=48&sortType=6&channel=RENT&transactionType=LETTING&displayLocationIdentifier=E17.html
|
||||
|
||||
|
||||
See a response example for the rental endpoint at [[rent.json]]
|
||||
|
||||
File diff suppressed because it is too large
Load diff
1323
finder/scraper.py
1323
finder/scraper.py
File diff suppressed because it is too large
Load diff
|
|
@ -4,17 +4,14 @@ from pathlib import Path
|
|||
|
||||
import polars as pl
|
||||
|
||||
from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY
|
||||
from transform import map_property_type, normalize_postcode, normalize_price
|
||||
from constants import MAX_BEDROOMS
|
||||
from transform import map_property_type, normalize_postcode
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
||||
"""Write properties list to parquet with server-ready column names.
|
||||
|
||||
channel: "buy" or "rent"
|
||||
"""
|
||||
def write_parquet(properties: list[dict], path: Path) -> None:
|
||||
"""Write sale properties list to parquet with server-ready column names."""
|
||||
if not properties:
|
||||
log.warning("No properties to write to %s", path)
|
||||
return
|
||||
|
|
@ -69,7 +66,7 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
listing_dates.append(dt)
|
||||
except (ValueError, TypeError):
|
||||
# Try additional date formats (OpenRent: "DD Month, YYYY", "Today")
|
||||
# Try additional date formats used by scraped listing sources.
|
||||
parsed = None
|
||||
stripped = fvd.strip()
|
||||
lower = stripped.lower()
|
||||
|
|
@ -93,35 +90,9 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
else:
|
||||
listing_dates.append(None)
|
||||
|
||||
# Derive asking price / asking rent based on channel
|
||||
# Zero prices indicate parsing failures or POA/auction listings — treat as null
|
||||
if channel == "buy":
|
||||
asking_prices = [p["price"] if p["price"] > 0 else None for p in properties]
|
||||
asking_rents = [None] * len(properties)
|
||||
listing_statuses = ["For sale"] * len(properties)
|
||||
else:
|
||||
asking_prices = [None] * len(properties)
|
||||
# Normalize to monthly, then apply sanity bounds. Rents outside
|
||||
# [MIN_RENT_MONTHLY, MAX_RENT_MONTHLY] are almost always total-stay
|
||||
# pricing (short lets), annual rents mislabelled as monthly, or £0
|
||||
# placeholders — null them out rather than polluting aggregates.
|
||||
rent_outliers = 0
|
||||
asking_rents = []
|
||||
for p in properties:
|
||||
monthly = normalize_price(p["price"], p["price_frequency"])
|
||||
if monthly < MIN_RENT_MONTHLY or monthly > MAX_RENT_MONTHLY:
|
||||
rent_outliers += 1
|
||||
asking_rents.append(None)
|
||||
else:
|
||||
asking_rents.append(monthly)
|
||||
if rent_outliers:
|
||||
log.warning(
|
||||
"Nulled %d rent outliers outside [£%d, £%d]/month",
|
||||
rent_outliers,
|
||||
MIN_RENT_MONTHLY,
|
||||
MAX_RENT_MONTHLY,
|
||||
)
|
||||
listing_statuses = ["For rent"] * len(properties)
|
||||
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
|
|
@ -146,7 +117,6 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
"Listing date": listing_dates,
|
||||
"Listing status": listing_statuses,
|
||||
"Asking price": asking_prices,
|
||||
"Asking rent (monthly)": asking_rents,
|
||||
},
|
||||
schema={
|
||||
"Bedrooms": pl.Int32,
|
||||
|
|
@ -166,12 +136,9 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
"Listing date": pl.Datetime("us"),
|
||||
"Listing status": pl.Utf8,
|
||||
"Asking price": pl.Int64,
|
||||
"Asking rent (monthly)": pl.Int64,
|
||||
},
|
||||
)
|
||||
|
||||
# Derive asking price per sqm for buy listings
|
||||
if channel == "buy":
|
||||
df = df.with_columns(
|
||||
(pl.col("Asking price") / pl.col("Total floor area (sqm)"))
|
||||
.round(0)
|
||||
|
|
|
|||
|
|
@ -143,15 +143,6 @@ def normalize_postcode(postcode: str) -> str:
|
|||
return compact[:-3] + " " + compact[-3:]
|
||||
|
||||
|
||||
def normalize_price(amount: int, frequency: str) -> int:
|
||||
"""Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
|
||||
if frequency == "weekly":
|
||||
return round(amount * 52 / 12)
|
||||
if frequency == "yearly":
|
||||
return round(amount / 12)
|
||||
return amount
|
||||
|
||||
|
||||
def transform_property(
|
||||
prop: dict, outcode: str, pc_index: PostcodeSpatialIndex
|
||||
) -> dict | None:
|
||||
|
|
@ -170,8 +161,6 @@ def transform_property(
|
|||
amount = price_obj.get("amount")
|
||||
if not amount:
|
||||
return None
|
||||
frequency = price_obj.get("frequency", "")
|
||||
# Store raw price — normalization to monthly happens once in storage.py
|
||||
price = int(amount)
|
||||
if price <= 0:
|
||||
return None
|
||||
|
|
@ -221,7 +210,7 @@ def transform_property(
|
|||
"Property type": map_property_type(sub_type),
|
||||
"Property sub-type": normalize_sub_type(sub_type),
|
||||
"price": price,
|
||||
"price_frequency": frequency,
|
||||
"price_frequency": "",
|
||||
"Price qualifier": price_qualifier,
|
||||
"Total floor area (sqm)": parse_display_size(prop.get("displaySize")),
|
||||
"Listing URL": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
|
||||
|
|
|
|||
175
finder/uv.lock
generated
175
finder/uv.lock
generated
|
|
@ -24,28 +24,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/af/38/9483eb52fc0f00039c684af627f8a8f994a8a99e8eceb869ba93b3fd740b/apify_fingerprint_datapoints-0.11.0-py3-none-any.whl", hash = "sha256:333340ccc3e520f19b5561e95d7abe2b31702e61d34b6247b328c9b8c93fbe1d", size = 726498, upload-time = "2026-03-01T01:00:03.103Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.14.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "soupsieve" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "browserforge"
|
||||
version = "1.2.4"
|
||||
|
|
@ -295,49 +273,22 @@ name = "finder"
|
|||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "camoufox" },
|
||||
{ name = "curl-cffi" },
|
||||
{ name = "fake-useragent" },
|
||||
{ name = "flask" },
|
||||
{ name = "httpx" },
|
||||
{ name = "lxml" },
|
||||
{ name = "playwright" },
|
||||
{ name = "playwright-stealth" },
|
||||
{ name = "polars" },
|
||||
{ name = "prometheus-client" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "camoufox", specifier = ">=0.4.11" },
|
||||
{ name = "curl-cffi" },
|
||||
{ name = "fake-useragent", specifier = ">=2.2.0" },
|
||||
{ name = "flask" },
|
||||
{ name = "httpx" },
|
||||
{ name = "lxml" },
|
||||
{ name = "playwright", specifier = ">=1.58.0" },
|
||||
{ name = "playwright-stealth", specifier = ">=2.0.2" },
|
||||
{ name = "polars" },
|
||||
{ name = "prometheus-client" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flask"
|
||||
version = "3.1.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "blinker" },
|
||||
{ name = "click" },
|
||||
{ name = "itsdangerous" },
|
||||
{ name = "jinja2" },
|
||||
{ name = "markupsafe" },
|
||||
{ name = "werkzeug" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/26/00/35d85dcce6c57fdc871f3867d465d780f302a175ea360f62533f12b27e2b/flask-3.1.3.tar.gz", hash = "sha256:0ef0e52b8a9cd932855379197dd8f94047b359ca0a78695144304cb45f87c9eb", size = 759004, upload-time = "2026-02-19T05:00:57.678Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -429,27 +380,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itsdangerous"
|
||||
version = "2.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jinja2"
|
||||
version = "3.1.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markupsafe" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "1.2.0"
|
||||
|
|
@ -539,69 +469,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "3.0.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "2.4.3"
|
||||
|
|
@ -744,18 +611,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "playwright-stealth"
|
||||
version = "2.0.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "playwright" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/61/ee/871901103c7b2a12070011fd4d978191f8f962837bf8bb51847274f528fa/playwright_stealth-2.0.2.tar.gz", hash = "sha256:ac57e51873190da5e653e03720e948c8f0a3d06b098f1d56763103d23ee48143", size = 24902, upload-time = "2026-02-13T02:36:25.137Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/30/f95f087f4b071611a7f63a2a0c9af4df3ac046dae2a693bfdacd70512867/playwright_stealth-2.0.2-py3-none-any.whl", hash = "sha256:37a5733f481b9c0ad602cf71491aa5a7c96c2a2fe4fa1e7ab764d2cd35520f2f", size = 33209, upload-time = "2026-02-13T02:36:26.334Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "1.39.0"
|
||||
|
|
@ -784,15 +639,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/b3/eb/936f5eeae196e8c8aaabe5f7d98891be8a5bbc741d50ce5c60f55575ad29/polars_runtime_32-1.39.0-cp310-abi3-win_arm64.whl", hash = "sha256:d69abde5f148566860bbe910010847bd7791e72f7c8063a4d2c462246a33a72a", size = 41885761, upload-time = "2026-03-12T14:23:16.773Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.24.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f0/58/a794d23feb6b00fc0c72787d7e87d872a6730dd9ed7c7b3e954637d8f280/prometheus_client-0.24.1.tar.gz", hash = "sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9", size = 85616, upload-time = "2026-01-14T15:26:26.965Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/74/c3/24a2f845e3917201628ecaba4f18bab4d18a337834c1df2a159ee9d22a42/prometheus_client-0.24.1-py3-none-any.whl", hash = "sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055", size = 64057, upload-time = "2026-01-14T15:26:24.42Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "3.0"
|
||||
|
|
@ -926,15 +772,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/6e/bf/c5205d480307bef660e56544b9e3d7ff687da776abb30c9cb3f330887570/screeninfo-0.8.1-py3-none-any.whl", hash = "sha256:e97d6b173856edcfa3bd282f81deb528188aff14b11ec3e195584e7641be733c", size = 12907, upload-time = "2022-09-09T11:35:21.351Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.8.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.67.3"
|
||||
|
|
@ -984,15 +821,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6
|
|||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "werkzeug"
|
||||
version = "3.1.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markupsafe" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/61/f1/ee81806690a87dab5f5653c1f146c92bc066d7f4cebc603ef88eb9e13957/werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25", size = 864736, upload-time = "2026-02-19T15:17:18.884Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4d/ec/d58832f89ede95652fd01f4f24236af7d32b70cab2196dfcc2d2fd13c5c2/werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131", size = 225166, upload-time = "2026-02-19T15:17:17.475Z" },
|
||||
]
|
||||
|
|
|
|||
279
finder/zoopla.py
279
finder/zoopla.py
|
|
@ -1,4 +1,4 @@
|
|||
"""Zoopla (zoopla.co.uk) scraper — buy and rental properties.
|
||||
"""Zoopla (zoopla.co.uk) scraper — sale properties.
|
||||
|
||||
Zoopla is behind Cloudflare Turnstile (managed interactive challenge), which
|
||||
blocks all HTTP clients (curl_cffi, httpx) and even Playwright with stealth
|
||||
|
|
@ -6,18 +6,14 @@ patches. Only Camoufox (an anti-fingerprinting Firefox fork) passes reliably.
|
|||
|
||||
Zoopla uses Next.js App Router with React Server Components (RSC). Search
|
||||
result data is server-rendered in an RSC stream, not available via
|
||||
__NEXT_DATA__ or a JSON API. URL-based location slugs return 0 results —
|
||||
the working flow requires typing into the autocomplete input, selecting a
|
||||
suggestion, and clicking Search.
|
||||
__NEXT_DATA__ or a JSON API.
|
||||
|
||||
Architecture:
|
||||
Unlike the other scrapers which use HTTP clients per outcode, Zoopla keeps
|
||||
a single Camoufox browser alive for the entire scrape. For each outcode, it:
|
||||
1. Clears and types the outcode into the search input
|
||||
2. Selects the first autocomplete suggestion
|
||||
3. Clicks Search
|
||||
4. Extracts listing data from the rendered DOM
|
||||
5. Handles pagination via ?pn=N parameter
|
||||
1. Navigates directly to the sale search URL
|
||||
2. Extracts listing data from the rendered DOM
|
||||
3. Handles pagination via ?pn=N parameter
|
||||
|
||||
The browser session replaces the cookie/client pattern used by other scrapers.
|
||||
"""
|
||||
|
|
@ -27,7 +23,6 @@ import re
|
|||
import time
|
||||
|
||||
from constants import DELAY_BETWEEN_PAGES, MAX_BEDROOMS, PROPERTY_TYPE_MAP, ZOOPLA_BASE
|
||||
from metrics import zoopla_errors_total, zoopla_pages_scraped, zoopla_properties_scraped
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import normalize_sub_type, validate_floor_area
|
||||
|
||||
|
|
@ -38,6 +33,25 @@ class TurnstileError(Exception):
|
|||
"""Raised when Cloudflare Turnstile challenge cannot be passed."""
|
||||
|
||||
|
||||
class _ManagedCamoufoxBrowser:
|
||||
def __init__(self, context_manager, browser):
|
||||
self._context_manager = context_manager
|
||||
self._browser = browser
|
||||
self._closed = False
|
||||
|
||||
def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
try:
|
||||
self._browser.close()
|
||||
finally:
|
||||
self._context_manager.__exit__(None, None, None)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._browser, name)
|
||||
|
||||
|
||||
# Maximum search result pages to scrape per outcode (25 listings/page)
|
||||
MAX_PAGES_PER_OUTCODE = 40
|
||||
|
||||
|
|
@ -55,7 +69,7 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
|
||||
for (const card of listingCards) {
|
||||
const link = card.querySelector(
|
||||
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
|
||||
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"]'
|
||||
);
|
||||
if (!link) continue;
|
||||
|
||||
|
|
@ -100,9 +114,9 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
|
||||
// Extract property type (e.g., "2 bed flat for sale" → "flat")
|
||||
let property_type = '';
|
||||
const ptMatch = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i);
|
||||
const ptMatch = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+for\s+sale/i);
|
||||
if (ptMatch) property_type = ptMatch[1].trim();
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+for\s+sale/i.test(text)) property_type = 'Studio';
|
||||
|
||||
// Keyword fallback when regex doesn't match current DOM format
|
||||
if (!property_type) {
|
||||
|
|
@ -135,7 +149,7 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
// Strategy 2: Fall back to href-based link matching with parent-walking
|
||||
if (results.length === 0) {
|
||||
const links = Array.from(document.querySelectorAll(
|
||||
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
|
||||
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"]'
|
||||
));
|
||||
|
||||
for (const link of links) {
|
||||
|
|
@ -184,9 +198,9 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
|
||||
// Extract property type
|
||||
let property_type = '';
|
||||
const ptMatch2 = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i);
|
||||
const ptMatch2 = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+for\s+sale/i);
|
||||
if (ptMatch2) property_type = ptMatch2[1].trim();
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+for\s+sale/i.test(text)) property_type = 'Studio';
|
||||
|
||||
// Keyword fallback when regex doesn't match current DOM format
|
||||
if (!property_type) {
|
||||
|
|
@ -243,17 +257,20 @@ def launch_browser():
|
|||
"""Launch Camoufox, navigate to Zoopla homepage, pass Cloudflare Turnstile,
|
||||
and dismiss cookie consent. Returns (browser, page) tuple.
|
||||
|
||||
Raises TurnstileError if Cloudflare cannot be passed within 60 seconds.
|
||||
Raises TurnstileError if Cloudflare cannot be passed within two minutes.
|
||||
Caller must close browser when done."""
|
||||
from camoufox.pkgman import camoufox_path
|
||||
|
||||
# Verify camoufox is pre-installed — never download at runtime
|
||||
camoufox_path(download_if_missing=False)
|
||||
# Standalone local runs should not require the old container image to have
|
||||
# pre-fetched Camoufox.
|
||||
camoufox_path(download_if_missing=True)
|
||||
|
||||
from camoufox.sync_api import Camoufox
|
||||
|
||||
log.info("Launching Camoufox browser for Zoopla...")
|
||||
browser = Camoufox(headless=True).__enter__()
|
||||
camoufox = Camoufox(headless=True)
|
||||
raw_browser = camoufox.__enter__()
|
||||
browser = _ManagedCamoufoxBrowser(camoufox, raw_browser)
|
||||
page = browser.new_page()
|
||||
|
||||
log.info("Navigating to Zoopla homepage...")
|
||||
|
|
@ -261,7 +278,7 @@ def launch_browser():
|
|||
|
||||
# Wait for Cloudflare Turnstile to resolve.
|
||||
# Try clicking the Turnstile checkbox if present (helps in some cases).
|
||||
for i in range(20):
|
||||
for i in range(40):
|
||||
if "Just a moment" not in page.title():
|
||||
break
|
||||
# Attempt to click the Turnstile checkbox in the challenge iframe
|
||||
|
|
@ -280,7 +297,7 @@ def launch_browser():
|
|||
else:
|
||||
page.close()
|
||||
browser.close()
|
||||
raise TurnstileError("Cloudflare Turnstile did not resolve after 60s")
|
||||
raise TurnstileError("Cloudflare Turnstile did not resolve after 120s")
|
||||
|
||||
log.info("Cloudflare passed — title: %s", page.title())
|
||||
time.sleep(2)
|
||||
|
|
@ -298,13 +315,13 @@ def _ensure_not_challenged(page) -> None:
|
|||
return
|
||||
|
||||
log.warning("Cloudflare challenge detected mid-session, waiting...")
|
||||
for i in range(20):
|
||||
for i in range(40):
|
||||
time.sleep(3)
|
||||
if "Just a moment" not in page.title():
|
||||
log.info("Cloudflare challenge resolved")
|
||||
return
|
||||
|
||||
raise TurnstileError("Cloudflare re-challenge did not resolve")
|
||||
raise TurnstileError("Cloudflare re-challenge did not resolve after 120s")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -312,21 +329,8 @@ def _ensure_not_challenged(page) -> None:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _navigate_direct(page, url: str) -> bool:
|
||||
"""Navigate directly to a Zoopla search URL (skipping the homepage flow).
|
||||
|
||||
Used to load the second channel (e.g., RENT after BUY) for the same outcode
|
||||
by swapping the path component. Falls back gracefully — returns False if
|
||||
the page has no listings, so the caller can retry via the full search flow.
|
||||
"""
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
except Exception as e:
|
||||
log.debug("Direct navigation failed: %s", e)
|
||||
return False
|
||||
_ensure_not_challenged(page)
|
||||
|
||||
# Wait for listing content to hydrate
|
||||
def _wait_for_listing_content(page) -> None:
|
||||
"""Wait for rendered listing cards to contain usable text."""
|
||||
try:
|
||||
page.wait_for_function(
|
||||
"""() => {
|
||||
|
|
@ -343,100 +347,42 @@ def _navigate_direct(page, url: str) -> bool:
|
|||
timeout=8000,
|
||||
)
|
||||
except Exception:
|
||||
# Check if the page has any listings at all
|
||||
has_listings = page.query_selector('a[href*="/details/"]')
|
||||
if not has_listings:
|
||||
return False
|
||||
time.sleep(1.5)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _navigate_search(page, outcode: str, channel: str) -> bool:
|
||||
"""Navigate to search results for an outcode via the homepage search flow.
|
||||
def _navigate_search(page, outcode: str) -> bool:
|
||||
"""Navigate directly to sale search results for an outcode.
|
||||
|
||||
Returns True if results were found, False if no results or navigation failed.
|
||||
Raises TurnstileError if Cloudflare blocks us."""
|
||||
# Navigate to homepage to reset search state
|
||||
page.goto(f"{ZOOPLA_BASE}/", wait_until="domcontentloaded", timeout=30000)
|
||||
time.sleep(0.5)
|
||||
url = (
|
||||
f"{ZOOPLA_BASE}/for-sale/property/{outcode.lower()}/"
|
||||
f"?q={outcode}&search_source=home"
|
||||
)
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
except Exception as exc:
|
||||
log.debug("Zoopla direct navigation failed for %s: %s", outcode, exc)
|
||||
return False
|
||||
|
||||
_ensure_not_challenged(page)
|
||||
|
||||
# Dismiss cookie consent (may reappear after navigation)
|
||||
try:
|
||||
page.evaluate(_DISMISS_COOKIES_JS)
|
||||
time.sleep(0.3)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Select Buy/Rent tab
|
||||
if channel == "RENT":
|
||||
rent_tab = page.query_selector(
|
||||
'button:has-text("Rent"), [role="tab"]:has-text("Rent")'
|
||||
)
|
||||
if rent_tab:
|
||||
rent_tab.click()
|
||||
time.sleep(0.2)
|
||||
|
||||
# Find and fill search input
|
||||
search_input = page.query_selector(
|
||||
'input[name="autosuggest-input"]'
|
||||
) or page.query_selector('input[type="text"]')
|
||||
if not search_input:
|
||||
log.warning("Could not find search input on homepage")
|
||||
return False
|
||||
|
||||
search_input.click()
|
||||
time.sleep(0.1)
|
||||
search_input.fill("")
|
||||
search_input.type(outcode, delay=60)
|
||||
time.sleep(1.2)
|
||||
|
||||
# Select first autocomplete suggestion
|
||||
first_option = page.query_selector('[role="option"]')
|
||||
if not first_option:
|
||||
log.debug("No autocomplete suggestions for outcode %s", outcode)
|
||||
return False
|
||||
|
||||
first_option.click()
|
||||
time.sleep(0.2)
|
||||
|
||||
# Click search button
|
||||
search_btn = page.query_selector('button:has-text("Search")')
|
||||
if search_btn:
|
||||
search_btn.click()
|
||||
else:
|
||||
search_input.press("Enter")
|
||||
|
||||
# Wait for results to load — try waiting for listings container, fall back to fixed wait
|
||||
try:
|
||||
page.wait_for_selector(
|
||||
'[data-testid="regular-listings"], a[href*="/details/"]',
|
||||
'[data-testid="regular-listings"], a[href*="/for-sale/details/"], a[href*="/new-homes/details/"]',
|
||||
timeout=10000,
|
||||
)
|
||||
except Exception:
|
||||
time.sleep(4)
|
||||
_ensure_not_challenged(page)
|
||||
if not page.query_selector('a[href*="/details/"]'):
|
||||
return False
|
||||
|
||||
# Wait for client-side hydration to populate listing content (prices, addresses).
|
||||
# The structural container appears in server-rendered HTML before React hydrates
|
||||
# the actual card content — extracting too early yields empty price/address fields.
|
||||
try:
|
||||
page.wait_for_function(
|
||||
"""() => {
|
||||
const cards = document.querySelectorAll(
|
||||
'[data-testid="regular-listings"] > div'
|
||||
);
|
||||
if (cards.length === 0) return false;
|
||||
for (const card of cards) {
|
||||
const t = card.innerText || '';
|
||||
if (t.includes('\\u00a3') && t.length > 50) return true;
|
||||
}
|
||||
return false;
|
||||
}""",
|
||||
timeout=8000,
|
||||
)
|
||||
except Exception:
|
||||
# Content never appeared — extraction will likely fail but let it try
|
||||
log.debug("Listing content hydration wait timed out — prices may not have rendered")
|
||||
time.sleep(2)
|
||||
_wait_for_listing_content(page)
|
||||
|
||||
return True
|
||||
|
||||
|
|
@ -516,18 +462,21 @@ def _extract_listings(page) -> list[dict]:
|
|||
return listings
|
||||
except Exception as e:
|
||||
log.warning("Failed to extract listings from DOM: %s", e)
|
||||
zoopla_errors_total.labels(type="extract_failed").inc()
|
||||
return []
|
||||
|
||||
|
||||
def _paginate(page, total_results: int, channel: str) -> list[dict]:
|
||||
def _paginate(
|
||||
page,
|
||||
total_results: int,
|
||||
max_properties: int | None = None,
|
||||
) -> list[dict]:
|
||||
"""Extract listings from all pages of search results.
|
||||
|
||||
Page 1 is already loaded. For subsequent pages, clicks the Next button
|
||||
or navigates via URL parameter ?pn=N."""
|
||||
all_listings = _extract_listings(page)
|
||||
channel_label = "buy" if channel == "BUY" else "rent"
|
||||
zoopla_pages_scraped.labels(channel=channel_label).inc()
|
||||
if max_properties is not None and len(all_listings) >= max_properties:
|
||||
return all_listings[:max_properties]
|
||||
|
||||
if not all_listings or total_results <= len(all_listings):
|
||||
return all_listings
|
||||
|
|
@ -550,24 +499,7 @@ def _paginate(page, total_results: int, channel: str) -> list[dict]:
|
|||
try:
|
||||
page.goto(next_url, wait_until="domcontentloaded", timeout=30000)
|
||||
_ensure_not_challenged(page)
|
||||
# Wait for listing content instead of fixed sleep
|
||||
try:
|
||||
page.wait_for_function(
|
||||
"""() => {
|
||||
const cards = document.querySelectorAll(
|
||||
'[data-testid="regular-listings"] > div'
|
||||
);
|
||||
if (cards.length === 0) return false;
|
||||
for (const card of cards) {
|
||||
const t = card.innerText || '';
|
||||
if (t.includes('\\u00a3') && t.length > 50) return true;
|
||||
}
|
||||
return false;
|
||||
}""",
|
||||
timeout=8000,
|
||||
)
|
||||
except Exception:
|
||||
time.sleep(1.5)
|
||||
_wait_for_listing_content(page)
|
||||
except TurnstileError:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -585,8 +517,8 @@ def _paginate(page, total_results: int, channel: str) -> list[dict]:
|
|||
seen_ids.add(listing["id"])
|
||||
all_listings.append(listing)
|
||||
new_count += 1
|
||||
|
||||
zoopla_pages_scraped.labels(channel=channel_label).inc()
|
||||
if max_properties is not None and len(all_listings) >= max_properties:
|
||||
return all_listings[:max_properties]
|
||||
|
||||
if new_count == 0:
|
||||
break # No new listings on this page
|
||||
|
|
@ -692,31 +624,8 @@ def _map_property_type(raw_type: str | None) -> str:
|
|||
return "Other"
|
||||
|
||||
|
||||
def _detect_rent_frequency(price_text: str) -> str:
|
||||
"""Detect rent frequency from Zoopla price text.
|
||||
|
||||
Zoopla price elements contain text like '£1,500 pcm', '£350 pw',
|
||||
'£18,000 pa'. Defaults to 'monthly' if no frequency indicator found.
|
||||
|
||||
Checks monthly indicators (pcm) BEFORE weekly (pw) because Zoopla cards
|
||||
often display both monthly and weekly prices in the same text. When the
|
||||
JS extraction falls back to full card text, checking pcm first ensures
|
||||
the captured monthly price gets the correct frequency label.
|
||||
"""
|
||||
lower = price_text.lower()
|
||||
if "pcm" in lower or "per month" in lower or "per calendar month" in lower:
|
||||
return "monthly"
|
||||
if "pw" in lower or "per week" in lower or "/w" in lower:
|
||||
return "weekly"
|
||||
if "pa" in lower or "per annum" in lower or "/y" in lower or "per year" in lower:
|
||||
return "yearly"
|
||||
# No indicator — default monthly (Zoopla standard)
|
||||
return "monthly"
|
||||
|
||||
|
||||
def transform_property(
|
||||
raw: dict,
|
||||
channel: str,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
pc_coords: dict[str, tuple[float, float]],
|
||||
search_outcode: str | None = None,
|
||||
|
|
@ -783,13 +692,6 @@ def transform_property(
|
|||
if listing_url and not listing_url.startswith("http"):
|
||||
listing_url = ZOOPLA_BASE + listing_url
|
||||
|
||||
# Detect rent frequency from price text (e.g. "£1,500 pcm" vs "£350 pw")
|
||||
if channel == "BUY":
|
||||
frequency = ""
|
||||
else:
|
||||
price_text = raw.get("price_text", "")
|
||||
frequency = _detect_rent_frequency(price_text)
|
||||
|
||||
return {
|
||||
"id": f"zp_{listing_id}",
|
||||
"Bedrooms": bedrooms,
|
||||
|
|
@ -803,7 +705,7 @@ def transform_property(
|
|||
"Property type": _map_property_type(raw.get("property_type")),
|
||||
"Property sub-type": normalize_sub_type(raw.get("property_type")),
|
||||
"price": int(price),
|
||||
"price_frequency": frequency,
|
||||
"price_frequency": "",
|
||||
"Price qualifier": "",
|
||||
"Total floor area (sqm)": floor_area_sqm,
|
||||
"Listing URL": listing_url,
|
||||
|
|
@ -820,10 +722,9 @@ def transform_property(
|
|||
def search_outcode(
|
||||
page,
|
||||
outcode: str,
|
||||
channel: str,
|
||||
pc_index: PostcodeSpatialIndex,
|
||||
pc_coords: dict[str, tuple[float, float]],
|
||||
base_search_url: str | None = None,
|
||||
max_properties: int | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Search Zoopla for properties in one outcode.
|
||||
|
||||
|
|
@ -831,47 +732,37 @@ def search_outcode(
|
|||
search flow, extracts listings from rendered DOM, and transforms to the
|
||||
standard output schema.
|
||||
|
||||
If base_search_url is provided (from a previous channel search for the same
|
||||
outcode), tries direct URL navigation first — skipping the slow homepage
|
||||
search flow. Falls back to full navigation if direct fails.
|
||||
|
||||
Returns (properties, search_url) where search_url can be passed to the next
|
||||
channel call for this outcode.
|
||||
Returns (properties, search_url).
|
||||
|
||||
Raises TurnstileError if Cloudflare blocks us mid-session.
|
||||
"""
|
||||
navigated = False
|
||||
if base_search_url:
|
||||
navigated = _navigate_direct(page, base_search_url)
|
||||
if navigated:
|
||||
log.debug("Zoopla %s %s: used direct URL navigation", outcode, channel)
|
||||
|
||||
if not navigated:
|
||||
if not _navigate_search(page, outcode, channel):
|
||||
if not _navigate_search(page, outcode):
|
||||
return [], None
|
||||
|
||||
total_results = _get_result_count(page)
|
||||
|
||||
# Always try extraction even if result count is 0 — the count regex may
|
||||
# not match Zoopla's current text format, but listings may still be in DOM
|
||||
raw_listings = _paginate(page, max(total_results, 25), channel)
|
||||
raw_listings = _paginate(
|
||||
page,
|
||||
max(total_results, 25),
|
||||
max_properties=max_properties,
|
||||
)
|
||||
if not raw_listings:
|
||||
if total_results > 0:
|
||||
log.debug(
|
||||
"Zoopla %s %s: page claims %d results but extraction found 0 — "
|
||||
"DOM selectors may need updating",
|
||||
outcode, channel, total_results,
|
||||
outcode, "BUY", total_results,
|
||||
)
|
||||
return [], None
|
||||
|
||||
channel_label = "buy" if channel == "BUY" else "rent"
|
||||
properties = []
|
||||
dropped = 0
|
||||
for raw in raw_listings:
|
||||
transformed = transform_property(raw, channel, pc_index, pc_coords, search_outcode=outcode)
|
||||
transformed = transform_property(raw, pc_index, pc_coords, search_outcode=outcode)
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
zoopla_properties_scraped.labels(channel=channel_label).inc()
|
||||
else:
|
||||
dropped += 1
|
||||
|
||||
|
|
@ -881,13 +772,13 @@ def search_outcode(
|
|||
log.debug(
|
||||
"Zoopla %s %s: extracted %d raw listings but all %d dropped in transform "
|
||||
"(no price/postcode/coords). Sample raw: price=%s address=%r",
|
||||
outcode, channel, len(raw_listings), dropped,
|
||||
outcode, "BUY", len(raw_listings), dropped,
|
||||
sample.get("price"), sample.get("address", ""),
|
||||
)
|
||||
elif dropped > len(raw_listings) // 2:
|
||||
log.debug(
|
||||
"Zoopla %s %s: %d/%d listings dropped in transform",
|
||||
outcode, channel, dropped, len(raw_listings),
|
||||
outcode, "BUY", dropped, len(raw_listings),
|
||||
)
|
||||
|
||||
return properties, page.url
|
||||
|
|
|
|||
92
frontend/package-lock.json
generated
92
frontend/package-lock.json
generated
|
|
@ -20,6 +20,7 @@
|
|||
"@protomaps/basemaps": "^5.7.2",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-slider": "^1.3.6",
|
||||
"@sentry/react": "^10.53.1",
|
||||
"@types/supercluster": "^7.1.3",
|
||||
"i18next": "^26.0.10",
|
||||
"maplibre-gl": "^5.24.0",
|
||||
|
|
@ -5287,6 +5288,97 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@sentry-internal/browser-utils": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry-internal/browser-utils/-/browser-utils-10.53.1.tgz",
|
||||
"integrity": "sha512-X4d6y8sBMjmNhcDW4eMBU3ASsNIMz8dqaFkhyIMN/dkYr/yZKnbRZPaVuVUGvHKjnlficPpIH0/HK9KBjrYxPw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry-internal/feedback": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry-internal/feedback/-/feedback-10.53.1.tgz",
|
||||
"integrity": "sha512-vVpTI/aEYN5d9IgZeYJWMqVaN0+iFgidSrYNAsZTh1US5sJUzF/wrl+68KdpmCtFROrN3jiAn1oPSwL5CKvEJA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry-internal/replay": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry-internal/replay/-/replay-10.53.1.tgz",
|
||||
"integrity": "sha512-wZNzTBYkgGUPWMuUQv7L64+OJmoCnz7GQNiTrTFK6EVAjJXFBCSsPp/nhif0bLhbk8+0g4xz633uOhpXuQbFdw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry-internal/browser-utils": "10.53.1",
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry-internal/replay-canvas": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry-internal/replay-canvas/-/replay-canvas-10.53.1.tgz",
|
||||
"integrity": "sha512-aueLaf/2prExwA76BGU5/bOXCKWqtt6jQXWA6WJQNrmKpPEtZJB4ypnpsou0McXQCF8tur2Y8U0TEkwQP13yJQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry-internal/replay": "10.53.1",
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry/browser": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry/browser/-/browser-10.53.1.tgz",
|
||||
"integrity": "sha512-zXF373hzUOGzUOrqd8xb1U3LQi5uYC3mwv+z5OMKUUinQlu30tTWBs7ypy6YTchtix9QlYaHWlayUF8vBZ5UjA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry-internal/browser-utils": "10.53.1",
|
||||
"@sentry-internal/feedback": "10.53.1",
|
||||
"@sentry-internal/replay": "10.53.1",
|
||||
"@sentry-internal/replay-canvas": "10.53.1",
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry/core": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry/core/-/core-10.53.1.tgz",
|
||||
"integrity": "sha512-XG4ezlkyuAPjBC5+9kXC94rXXuqYTw9NRhfaDHssbTFaGnqBR8vQX2UUgZfY7ucbeelRDGfBu1sywoU+mB04uA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@sentry/react": {
|
||||
"version": "10.53.1",
|
||||
"resolved": "https://registry.npmjs.org/@sentry/react/-/react-10.53.1.tgz",
|
||||
"integrity": "sha512-lrwNq5T/zW84l60894TpKHPcvFuc1I/Hnohecc0TfYVpIcYYuw2orCHoU4v4wgkFaJUpegVetbgdOphViyLVjA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sentry/browser": "10.53.1",
|
||||
"@sentry/core": "10.53.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.14.0 || 17.x || 18.x || 19.x"
|
||||
}
|
||||
},
|
||||
"node_modules/@standard-schema/spec": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz",
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
"@protomaps/basemaps": "^5.7.2",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-slider": "^1.3.6",
|
||||
"@sentry/react": "^10.53.1",
|
||||
"@types/supercluster": "^7.1.3",
|
||||
"i18next": "^26.0.10",
|
||||
"maplibre-gl": "^5.24.0",
|
||||
|
|
|
|||
|
|
@ -318,6 +318,7 @@ export default function App() {
|
|||
|
||||
const savedSearches = useSavedSearches(user?.id ?? null);
|
||||
const [showSaveModal, setShowSaveModal] = useState(false);
|
||||
const [editingSearch, setEditingSearch] = useState<{ id: string; name: string } | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const controller = new AbortController();
|
||||
|
|
@ -374,11 +375,52 @@ export default function App() {
|
|||
}
|
||||
setRouteHash(targetHash);
|
||||
setActivePage(page);
|
||||
setEditingSearch(null);
|
||||
if (targetHash) scrollToHash(targetHash);
|
||||
},
|
||||
[inviteCode]
|
||||
);
|
||||
|
||||
const handleEditSearch = useCallback(
|
||||
(id: string, name: string, params: string) => {
|
||||
const search = params.startsWith('?') ? params : `?${params}`;
|
||||
dashboardSearchRef.current = search;
|
||||
const url = `/dashboard${search}`;
|
||||
window.history.pushState({ page: 'dashboard', hash: '' }, '', url);
|
||||
setMapUrlState(parseUrlState());
|
||||
setDashboardRouteKey(search);
|
||||
setRouteHash('');
|
||||
setActivePage('dashboard');
|
||||
setEditingSearch({ id, name });
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
const handleCancelEdit = useCallback(() => {
|
||||
setEditingSearch(null);
|
||||
}, []);
|
||||
|
||||
const updateEditingSearch = useCallback(
|
||||
async (params: string) => {
|
||||
if (!editingSearch) return;
|
||||
await savedSearches.updateSearchParams(editingSearch.id, params);
|
||||
setEditingSearch(null);
|
||||
},
|
||||
[editingSearch, savedSearches]
|
||||
);
|
||||
|
||||
const handleUpdateEdit = useCallback(
|
||||
async (params: string) => {
|
||||
try {
|
||||
await updateEditingSearch(params);
|
||||
navigateTo('saved');
|
||||
} catch {
|
||||
// Error stored on savedSearches.error
|
||||
}
|
||||
},
|
||||
[updateEditingSearch, navigateTo]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (authLoading || !user || postAuthIntent !== 'checkout') return;
|
||||
|
||||
|
|
@ -439,6 +481,8 @@ export default function App() {
|
|||
if (page === 'dashboard') {
|
||||
setMapUrlState(parseUrlState());
|
||||
setDashboardRouteKey(window.location.search);
|
||||
} else {
|
||||
setEditingSearch(null);
|
||||
}
|
||||
};
|
||||
window.addEventListener('popstate', handlePopState);
|
||||
|
|
@ -517,8 +561,17 @@ export default function App() {
|
|||
onToggleTheme={toggleTheme}
|
||||
exportState={activePage === 'dashboard' ? exportState : null}
|
||||
dashboardParams={activePage === 'dashboard' ? dashboardParams : ''}
|
||||
onSaveSearch={activePage === 'dashboard' && user ? () => setShowSaveModal(true) : null}
|
||||
onSaveSearch={
|
||||
activePage === 'dashboard' && user
|
||||
? editingSearch
|
||||
? () => handleUpdateEdit(dashboardParams)
|
||||
: () => setShowSaveModal(true)
|
||||
: null
|
||||
}
|
||||
savingSearch={savedSearches.saving}
|
||||
editingSearch={activePage === 'dashboard' ? editingSearch : null}
|
||||
onCancelEdit={handleCancelEdit}
|
||||
onUpdateEdit={() => handleUpdateEdit(dashboardParams)}
|
||||
user={user}
|
||||
onLoginClick={() => openAuthModal('login')}
|
||||
onRegisterClick={() => openAuthModal('register')}
|
||||
|
|
@ -553,9 +606,7 @@ export default function App() {
|
|||
onDeleteSearch={savedSearches.deleteSearch}
|
||||
onUpdateSearchNotes={savedSearches.updateSearchNotes}
|
||||
onUpdateSearchName={savedSearches.updateSearchName}
|
||||
onOpenSearch={(params) => {
|
||||
window.location.href = `/dashboard?${params}`;
|
||||
}}
|
||||
onOpenSearch={handleEditSearch}
|
||||
/>
|
||||
) : activePage === 'account' && user ? (
|
||||
<AccountPage
|
||||
|
|
@ -609,6 +660,10 @@ export default function App() {
|
|||
deferTutorial={licenseSuccessStatus !== 'hidden'}
|
||||
onSaveSearch={user ? savedSearches.saveSearch : undefined}
|
||||
savingSearch={savedSearches.saving}
|
||||
editingSearch={editingSearch}
|
||||
onCancelEdit={handleCancelEdit}
|
||||
onUpdateEdit={handleUpdateEdit}
|
||||
onUpdateEditInPlace={updateEditingSearch}
|
||||
/>
|
||||
)}
|
||||
</Suspense>
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ function SavedSearchesTab({
|
|||
onDelete: (id: string) => Promise<void>;
|
||||
onUpdateNotes: (id: string, notes: string) => void;
|
||||
onUpdateName: (id: string, name: string) => void;
|
||||
onOpen: (params: string) => void;
|
||||
onOpen: (id: string, name: string, params: string) => void;
|
||||
}) {
|
||||
const { t, i18n } = useTranslation();
|
||||
const [deleteConfirmId, setDeleteConfirmId] = useState<string | null>(null);
|
||||
|
|
@ -302,7 +302,7 @@ function SavedSearchesTab({
|
|||
|
||||
<div className="flex gap-2 mt-auto">
|
||||
<button
|
||||
onClick={() => onOpen(search.params)}
|
||||
onClick={() => onOpen(search.id, search.name, search.params)}
|
||||
className="flex-1 px-3 py-1.5 text-sm font-medium rounded bg-teal-600 text-white hover:bg-teal-700"
|
||||
>
|
||||
{t('common.open')}
|
||||
|
|
@ -358,7 +358,7 @@ export function SavedPage({
|
|||
onDeleteSearch: (id: string) => Promise<void>;
|
||||
onUpdateSearchNotes: (id: string, notes: string) => void;
|
||||
onUpdateSearchName: (id: string, name: string) => void;
|
||||
onOpenSearch: (params: string) => void;
|
||||
onOpenSearch: (id: string, name: string, params: string) => void;
|
||||
}) {
|
||||
const { t } = useTranslation();
|
||||
const [activeTab, setActiveTab] = useState<'searches' | 'shared-links'>(
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ const DEMO_FEATURES: FeatureMeta[] = [
|
|||
{
|
||||
name: 'Good+ primary schools within 2km',
|
||||
type: 'numeric',
|
||||
group: 'Education',
|
||||
group: 'Schools',
|
||||
min: 0,
|
||||
max: 8,
|
||||
step: 1,
|
||||
|
|
@ -92,7 +92,7 @@ const DEMO_FEATURES: FeatureMeta[] = [
|
|||
{
|
||||
name: 'Noise (dB)',
|
||||
type: 'numeric',
|
||||
group: 'Environment',
|
||||
group: 'Defining characteristics',
|
||||
min: 40,
|
||||
max: 80,
|
||||
step: 1,
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ interface AreaPaneProps {
|
|||
loading: boolean;
|
||||
hexagonId: string | null;
|
||||
isPostcode?: boolean;
|
||||
onViewProperties: () => void;
|
||||
hexagonLocation: HexagonLocation | null;
|
||||
filters: FeatureFilters;
|
||||
unfilteredCount?: number | null;
|
||||
|
|
@ -82,7 +81,6 @@ export default function AreaPane({
|
|||
loading,
|
||||
hexagonId,
|
||||
isPostcode = false,
|
||||
onViewProperties,
|
||||
hexagonLocation,
|
||||
filters,
|
||||
unfilteredCount,
|
||||
|
|
@ -100,7 +98,6 @@ export default function AreaPane({
|
|||
const filtersActive = activeFilterCount > 0;
|
||||
const filteredStatsEmpty = filtersActive && statsUseFilters && stats?.count === 0;
|
||||
const showFlipToggleCallout = filteredStatsEmpty && unfilteredCount !== 0;
|
||||
const canViewProperties = stats && stats.count > 0 && (statsUseFilters || !filtersActive);
|
||||
const featureGroups = useMemo(() => groupFeaturesByCategory(globalFeatures), [globalFeatures]);
|
||||
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
|
||||
|
||||
|
|
@ -275,14 +272,6 @@ export default function AreaPane({
|
|||
)}
|
||||
</div>
|
||||
)}
|
||||
{canViewProperties && (
|
||||
<button
|
||||
onClick={onViewProperties}
|
||||
className="w-full text-sm py-1.5 rounded bg-teal-600 hover:bg-teal-700 text-white font-medium"
|
||||
>
|
||||
{t('areaPane.viewPropertiesShort')}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ export default function ExternalSearchLinks({
|
|||
|
||||
if (!urls) return null;
|
||||
|
||||
const primaryLinkClass =
|
||||
'flex-1 text-center text-xs py-1.5 px-2 rounded bg-teal-600 hover:bg-teal-700 text-white dark:bg-teal-500 dark:text-navy-950 dark:hover:bg-teal-400 font-medium shadow-sm';
|
||||
const linkClass =
|
||||
'flex-1 text-center text-xs py-1.5 px-2 rounded border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-800 text-teal-600 dark:text-teal-400 hover:bg-warm-50 dark:hover:bg-warm-700 font-medium';
|
||||
const disabledClass =
|
||||
|
|
@ -56,7 +58,12 @@ export default function ExternalSearchLinks({
|
|||
</h3>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{rightmoveHref ? (
|
||||
<a href={rightmoveHref} target="_blank" rel="noopener noreferrer" className={linkClass}>
|
||||
<a
|
||||
href={rightmoveHref}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className={primaryLinkClass}
|
||||
>
|
||||
Rightmove
|
||||
</a>
|
||||
) : (
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ export default function FeatureBrowser({
|
|||
title={t('filters.aboutData')}
|
||||
size="md"
|
||||
>
|
||||
<InfoIcon className="w-5 h-5 md:w-3.5 md:h-3.5" />
|
||||
<InfoIcon className="w-4 h-4" />
|
||||
</IconButton>
|
||||
<button
|
||||
type="button"
|
||||
|
|
|
|||
|
|
@ -106,6 +106,9 @@ interface FiltersProps {
|
|||
onClearAll: () => void;
|
||||
onSaveSearch?: (name: string) => Promise<void>;
|
||||
savingSearch?: boolean;
|
||||
editingSearchName?: string | null;
|
||||
onUpdateSearch?: () => Promise<void>;
|
||||
onExitEditing?: () => void;
|
||||
destinationDropdownPortal?: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -148,6 +151,9 @@ export default memo(function Filters({
|
|||
onClearAll,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
editingSearchName,
|
||||
onUpdateSearch,
|
||||
onExitEditing,
|
||||
destinationDropdownPortal = true,
|
||||
}: FiltersProps) {
|
||||
const { t } = useTranslation();
|
||||
|
|
@ -229,7 +235,7 @@ export default memo(function Filters({
|
|||
const backendFeature = backendName
|
||||
? features.find((feature) => feature.name === backendName)
|
||||
: undefined;
|
||||
return { ...(backendFeature ?? schoolMeta), name, group: 'Education' };
|
||||
return { ...(backendFeature ?? schoolMeta), name, group: 'Schools' };
|
||||
});
|
||||
}, [filters, features, schoolMeta]);
|
||||
const specificCrimeFilterItems = useMemo(() => {
|
||||
|
|
@ -441,7 +447,7 @@ export default memo(function Filters({
|
|||
|
||||
const getAddFilterGroupName = useCallback(
|
||||
(name: string): string | null => {
|
||||
if (name === SCHOOL_FILTER_NAME) return schoolMeta.group ?? 'Education';
|
||||
if (name === SCHOOL_FILTER_NAME) return schoolMeta.group ?? 'Schools';
|
||||
if (name === SPECIFIC_CRIMES_FILTER_NAME) return specificCrimeMeta.group ?? 'Crime';
|
||||
if (name === ELECTION_VOTE_SHARE_FILTER_NAME) {
|
||||
return electionVoteShareMeta.group ?? 'Neighbours';
|
||||
|
|
@ -569,14 +575,14 @@ export default memo(function Filters({
|
|||
|
||||
const handleClearAllClick = useCallback(() => {
|
||||
if (badgeCount === 0) return;
|
||||
if (onSaveSearch) {
|
||||
if (onUpdateSearch || onSaveSearch) {
|
||||
setShowClearPopup(true);
|
||||
setClearSaveName('');
|
||||
setClearSaveError(null);
|
||||
} else {
|
||||
onClearAll();
|
||||
}
|
||||
}, [badgeCount, onSaveSearch, onClearAll]);
|
||||
}, [badgeCount, onUpdateSearch, onSaveSearch, onClearAll]);
|
||||
|
||||
const handleSaveAndClear = useCallback(
|
||||
async (e: FormEvent) => {
|
||||
|
|
@ -593,10 +599,22 @@ export default memo(function Filters({
|
|||
[clearSaveName, savingSearch, onSaveSearch, onClearAll, t]
|
||||
);
|
||||
|
||||
const handleUpdateAndClear = useCallback(async () => {
|
||||
if (savingSearch || !onUpdateSearch) return;
|
||||
try {
|
||||
await onUpdateSearch();
|
||||
setShowClearPopup(false);
|
||||
onClearAll();
|
||||
} catch {
|
||||
setClearSaveError(t('saveSearch.saving'));
|
||||
}
|
||||
}, [savingSearch, onUpdateSearch, onClearAll, t]);
|
||||
|
||||
const handleClearWithoutSaving = useCallback(() => {
|
||||
setShowClearPopup(false);
|
||||
onClearAll();
|
||||
}, [onClearAll]);
|
||||
if (editingSearchName) onExitEditing?.();
|
||||
}, [onClearAll, editingSearchName, onExitEditing]);
|
||||
|
||||
return (
|
||||
<div
|
||||
|
|
@ -732,9 +750,11 @@ export default memo(function Filters({
|
|||
saveName={clearSaveName}
|
||||
saveError={clearSaveError}
|
||||
savingSearch={savingSearch}
|
||||
editingSearchName={editingSearchName ?? null}
|
||||
onClose={() => setShowClearPopup(false)}
|
||||
onSaveNameChange={setClearSaveName}
|
||||
onSaveAndClear={handleSaveAndClear}
|
||||
onUpdateAndClear={onUpdateSearch ? handleUpdateAndClear : undefined}
|
||||
onClearWithoutSaving={handleClearWithoutSaving}
|
||||
/>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -65,18 +65,27 @@ describe('JourneyInstructions', () => {
|
|||
expect(screen.getByText(/Canary Wharf/)).toBeTruthy();
|
||||
});
|
||||
|
||||
it('builds explicit Google Maps transit directions instead of a path URL', () => {
|
||||
it('builds explicit Google Maps transit directions with destination coordinates', () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-16T12:00:00Z'));
|
||||
|
||||
const url = googleMapsUrl('NW7 2GA', 'Bank tube station');
|
||||
const url = googleMapsUrl('NW7 2GA', 'Bank tube station', 51.5132819, -0.0895555);
|
||||
const parsed = new URL(url);
|
||||
|
||||
expect(parsed.origin + parsed.pathname).toBe('https://www.google.com/maps/dir/');
|
||||
expect(parsed.searchParams.get('api')).toBe('1');
|
||||
expect(parsed.searchParams.get('origin')).toBe('NW7 2GA');
|
||||
expect(parsed.searchParams.get('destination')).toBe('Bank Station, London');
|
||||
expect(parsed.searchParams.get('destination')).toBe('51.5132819,-0.0895555');
|
||||
expect(parsed.searchParams.get('travelmode')).toBe('transit');
|
||||
expect(parsed.searchParams.get('departure_time')).toBe('1779085800');
|
||||
});
|
||||
|
||||
it('does not rewrite destination names when coordinates are unavailable', () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-16T12:00:00Z'));
|
||||
|
||||
const parsed = new URL(googleMapsUrl('NW7 2GA', 'Bank tube station'));
|
||||
|
||||
expect(parsed.searchParams.get('destination')).toBe('Bank tube station');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ interface JourneyData {
|
|||
minutes: number | null;
|
||||
/** Best-case (5th percentile) total travel time from R5. */
|
||||
bestMinutes: number | null;
|
||||
destinationLat: number | null;
|
||||
destinationLon: number | null;
|
||||
/** Whether the dashboard filter is currently using best-case time. */
|
||||
useBest: boolean;
|
||||
loading: boolean;
|
||||
|
|
@ -39,6 +41,8 @@ export interface JourneyInstructionPreset {
|
|||
minutes: number | null;
|
||||
/** Best-case (5th percentile) total travel time. */
|
||||
bestMinutes?: number | null;
|
||||
destinationLat?: number | null;
|
||||
destinationLon?: number | null;
|
||||
useBest?: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -94,20 +98,33 @@ function nextMondayAt730(): number {
|
|||
return Math.floor(monday.getTime() / 1000);
|
||||
}
|
||||
|
||||
function googleMapsDestination(destination: string): string {
|
||||
const clean = stripId(destination).trim();
|
||||
if (/\btube station$/i.test(clean)) {
|
||||
return `${clean.replace(/\s+tube station$/i, ' Station')}, London`;
|
||||
function googleMapsDestination(
|
||||
destination: string,
|
||||
destinationLat?: number | null,
|
||||
destinationLon?: number | null
|
||||
): string {
|
||||
if (
|
||||
destinationLat != null &&
|
||||
destinationLon != null &&
|
||||
Number.isFinite(destinationLat) &&
|
||||
Number.isFinite(destinationLon)
|
||||
) {
|
||||
return `${destinationLat},${destinationLon}`;
|
||||
}
|
||||
return clean;
|
||||
return stripId(destination).trim();
|
||||
}
|
||||
|
||||
export function googleMapsUrl(origin: string, destination: string): string {
|
||||
export function googleMapsUrl(
|
||||
origin: string,
|
||||
destination: string,
|
||||
destinationLat?: number | null,
|
||||
destinationLon?: number | null
|
||||
): string {
|
||||
const ts = nextMondayAt730();
|
||||
const params = new URLSearchParams({
|
||||
api: '1',
|
||||
origin,
|
||||
destination: googleMapsDestination(destination),
|
||||
destination: googleMapsDestination(destination, destinationLat, destinationLon),
|
||||
travelmode: 'transit',
|
||||
departure_time: ts.toString(),
|
||||
});
|
||||
|
|
@ -224,6 +241,8 @@ export default function JourneyInstructions({
|
|||
legs: null,
|
||||
minutes: null,
|
||||
bestMinutes: null,
|
||||
destinationLat: null,
|
||||
destinationLon: null,
|
||||
useBest: e.useBest,
|
||||
loading: true,
|
||||
}));
|
||||
|
|
@ -246,6 +265,8 @@ export default function JourneyInstructions({
|
|||
journey: JourneyLeg[] | null;
|
||||
minutes: number | null;
|
||||
best_minutes: number | null;
|
||||
destination_lat?: number | null;
|
||||
destination_lon?: number | null;
|
||||
}) => {
|
||||
setJourneys((prev) =>
|
||||
prev.map((j, i) =>
|
||||
|
|
@ -255,6 +276,8 @@ export default function JourneyInstructions({
|
|||
legs: data.journey,
|
||||
minutes: data.minutes,
|
||||
bestMinutes: data.best_minutes,
|
||||
destinationLat: data.destination_lat ?? null,
|
||||
destinationLon: data.destination_lon ?? null,
|
||||
loading: false,
|
||||
}
|
||||
: j
|
||||
|
|
@ -280,6 +303,8 @@ export default function JourneyInstructions({
|
|||
legs: journey.legs,
|
||||
minutes: journey.minutes,
|
||||
bestMinutes: journey.bestMinutes ?? null,
|
||||
destinationLat: journey.destinationLat ?? null,
|
||||
destinationLon: journey.destinationLon ?? null,
|
||||
useBest: journey.useBest ?? false,
|
||||
loading: false,
|
||||
}))
|
||||
|
|
@ -326,7 +351,7 @@ export default function JourneyInstructions({
|
|||
))}
|
||||
{showGoogleMapsLink && (
|
||||
<a
|
||||
href={googleMapsUrl(postcode, destination)}
|
||||
href={googleMapsUrl(postcode, destination, j.destinationLat, j.destinationLon)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="mt-2 flex items-center justify-center gap-1.5 w-full text-[11px] font-medium text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 bg-white dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded-md py-1.5 transition-colors"
|
||||
|
|
@ -361,7 +386,7 @@ export default function JourneyInstructions({
|
|||
</div>
|
||||
{showGoogleMapsLink && (
|
||||
<a
|
||||
href={googleMapsUrl(postcode, destination)}
|
||||
href={googleMapsUrl(postcode, destination, j.destinationLat, j.destinationLon)}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="mt-2 flex items-center justify-center gap-1.5 w-full text-[11px] font-medium text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 bg-white dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded-md py-1.5 transition-colors"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { Suspense, useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Trans, useTranslation } from 'react-i18next';
|
||||
|
||||
import type { MapFlyToOptions, PostcodeGeometry } from '../../types';
|
||||
import type { SearchedLocation } from './LocationSearch';
|
||||
|
|
@ -82,6 +82,10 @@ export default function MapPage({
|
|||
deferTutorial = false,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
editingSearch,
|
||||
onCancelEdit,
|
||||
onUpdateEdit,
|
||||
onUpdateEditInPlace,
|
||||
}: MapPageProps) {
|
||||
const { t } = useTranslation();
|
||||
const [selectedPOICategories, setSelectedPOICategories] =
|
||||
|
|
@ -164,6 +168,7 @@ export default function MapPage({
|
|||
viewFeature,
|
||||
activeFeature,
|
||||
pinnedFeature,
|
||||
filterRange,
|
||||
travelTimeEntries: entries,
|
||||
shareCode,
|
||||
});
|
||||
|
|
@ -283,7 +288,6 @@ export default function MapPage({
|
|||
setRightPaneTab,
|
||||
handleHexagonClick,
|
||||
handleHexagonHover,
|
||||
handleViewPropertiesFromArea,
|
||||
handlePropertiesTabClick,
|
||||
handleLoadMoreProperties,
|
||||
handleCloseSelection,
|
||||
|
|
@ -506,6 +510,9 @@ export default function MapPage({
|
|||
},
|
||||
[dashboardParams, onSaveSearch]
|
||||
);
|
||||
const handleUpdateEditInPlaceWithParams = useCallback(async () => {
|
||||
await onUpdateEditInPlace?.(dashboardParams);
|
||||
}, [dashboardParams, onUpdateEditInPlace]);
|
||||
const checkoutReturnPath = useMemo(
|
||||
() => `/dashboard${dashboardParams ? `?${dashboardParams}` : ''}`,
|
||||
[dashboardParams]
|
||||
|
|
@ -543,7 +550,6 @@ export default function MapPage({
|
|||
loading={loadingAreaStats}
|
||||
hexagonId={selectedHexagon?.id || null}
|
||||
isPostcode={selectedHexagon?.type === 'postcode'}
|
||||
onViewProperties={handleViewPropertiesFromArea}
|
||||
hexagonLocation={hexagonLocation}
|
||||
filters={filters}
|
||||
unfilteredCount={unfilteredAreaCount}
|
||||
|
|
@ -621,6 +627,11 @@ export default function MapPage({
|
|||
onClearAll={handleClearAll}
|
||||
onSaveSearch={onSaveSearch ? handleSaveSearch : undefined}
|
||||
savingSearch={savingSearch}
|
||||
editingSearchName={editingSearch?.name ?? null}
|
||||
onUpdateSearch={
|
||||
editingSearch && onUpdateEditInPlace ? handleUpdateEditInPlaceWithParams : undefined
|
||||
}
|
||||
onExitEditing={onCancelEdit}
|
||||
destinationDropdownPortal={options?.destinationDropdownPortal}
|
||||
/>
|
||||
</Suspense>
|
||||
|
|
@ -643,6 +654,40 @@ export default function MapPage({
|
|||
/>
|
||||
);
|
||||
const toasts = exportToast;
|
||||
|
||||
const editingBar =
|
||||
editingSearch && isMobile ? (
|
||||
<div className="flex items-center gap-2 px-3 py-2 border-b border-warm-200 dark:border-navy-700 bg-warm-50 dark:bg-navy-900">
|
||||
<span
|
||||
className="flex-1 min-w-0 truncate text-xs text-warm-700 dark:text-warm-200"
|
||||
title={editingSearch.name}
|
||||
>
|
||||
<Trans
|
||||
i18nKey="savedPage.isBeingUpdated"
|
||||
values={{ name: editingSearch.name }}
|
||||
components={{
|
||||
strong: (
|
||||
<strong className="font-semibold text-navy-950 dark:text-warm-100" />
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</span>
|
||||
<button
|
||||
onClick={onCancelEdit}
|
||||
className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium border border-warm-200 dark:border-warm-700 text-warm-700 dark:text-warm-200 hover:bg-warm-100 dark:hover:bg-navy-800"
|
||||
>
|
||||
{t('common.cancel')}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => onUpdateEdit?.(dashboardParams)}
|
||||
disabled={savingSearch}
|
||||
className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium bg-teal-600 text-white hover:bg-teal-700 disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
|
||||
>
|
||||
{savingSearch ? t('savedPage.updating') : t('common.update')}
|
||||
</button>
|
||||
</div>
|
||||
) : null;
|
||||
|
||||
const upgradeModal = mapData.licenseRequired ? (
|
||||
<Suspense fallback={null}>
|
||||
<UpgradeModal
|
||||
|
|
@ -714,6 +759,7 @@ export default function MapPage({
|
|||
renderPropertiesPane={renderPropertiesPane}
|
||||
toasts={toasts}
|
||||
upgradeModal={upgradeModal}
|
||||
editingBar={editingBar}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ interface VisualViewportState {
|
|||
interface MobileBottomSheetProps {
|
||||
children: ReactNode;
|
||||
legend?: ReactNode;
|
||||
editingBar?: ReactNode;
|
||||
onCoveredHeightChange?: (height: number) => void;
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +105,7 @@ function getKeyboardEditableElement(target: EventTarget | null): HTMLElement | n
|
|||
export default function MobileBottomSheet({
|
||||
children,
|
||||
legend,
|
||||
editingBar,
|
||||
onCoveredHeightChange,
|
||||
}: MobileBottomSheetProps) {
|
||||
const [keyboardAvoidanceActive, setKeyboardAvoidanceActive] = useState(false);
|
||||
|
|
@ -244,6 +246,8 @@ export default function MobileBottomSheet({
|
|||
</div>
|
||||
</div>
|
||||
|
||||
{editingBar && <div className="shrink-0">{editingBar}</div>}
|
||||
|
||||
{legend && (
|
||||
<div className="shrink-0 border-y border-warm-200 dark:border-navy-700">{legend}</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -86,16 +86,17 @@ export function TravelTimeCard({
|
|||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 md:gap-0.5">
|
||||
<IconButton onClick={() => setShowInfo(true)} title={t('filters.aboutData')}>
|
||||
<InfoIcon className="w-3.5 h-3.5" />
|
||||
<IconButton onClick={() => setShowInfo(true)} title={t('filters.aboutData')} size="md">
|
||||
<InfoIcon className="w-5 h-5 md:w-3.5 md:h-3.5" />
|
||||
</IconButton>
|
||||
{slug && (
|
||||
<IconButton
|
||||
onClick={onTogglePin}
|
||||
active={isPinned || isActive}
|
||||
title={isPinned ? t('filters.clearColourMap') : t('filters.colourMap')}
|
||||
size="md"
|
||||
>
|
||||
<EyeIcon className="w-3.5 h-3.5" filled={isPinned || isActive} />
|
||||
<EyeIcon className="w-5 h-5 md:w-3.5 md:h-3.5" filled={isPinned || isActive} />
|
||||
</IconButton>
|
||||
)}
|
||||
<IconButton onClick={() => onRemove()} title={t('travel.removeTravelTime')}>
|
||||
|
|
|
|||
|
|
@ -110,14 +110,14 @@ export function ActiveFiltersPanel({
|
|||
>
|
||||
<button
|
||||
onClick={onToggleCollapsed}
|
||||
className="shrink-0 flex items-center justify-between border-b border-l-4 border-warm-200 border-l-teal-500 bg-white px-3 py-2 cursor-pointer shadow-sm hover:bg-warm-50 dark:border-navy-700 dark:border-l-teal-400 dark:bg-navy-900 dark:hover:bg-navy-800"
|
||||
className="shrink-0 flex items-center justify-between border-b border-l-4 border-teal-200 border-l-teal-600 bg-teal-50 px-3 py-2.5 cursor-pointer shadow-md ring-1 ring-inset ring-teal-100 hover:bg-teal-100 dark:border-teal-900/50 dark:border-l-teal-300 dark:bg-teal-950/30 dark:ring-teal-800/60 dark:hover:bg-teal-900/40"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">
|
||||
<span className="text-sm font-bold text-navy-950 dark:text-warm-100">
|
||||
{t('filters.activeFilters')}
|
||||
</span>
|
||||
{badgeCount > 0 && (
|
||||
<span className="rounded-full bg-teal-50 px-1.5 py-0.5 text-xs font-medium text-teal-700 ring-1 ring-teal-100 dark:bg-teal-900/30 dark:text-teal-300 dark:ring-teal-800">
|
||||
<span className="rounded-full bg-teal-600 px-1.5 py-0.5 text-xs font-bold text-white ring-1 ring-teal-700 dark:bg-teal-300 dark:text-navy-950 dark:ring-teal-200">
|
||||
{badgeCount}
|
||||
</span>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -110,9 +110,9 @@ export function AddFilterPanel({
|
|||
>
|
||||
<button
|
||||
onClick={onToggleCollapsed}
|
||||
className="shrink-0 flex items-center justify-between border-b border-l-4 border-warm-200 border-l-teal-500 bg-white px-3 py-2 cursor-pointer shadow-sm hover:bg-warm-50 dark:border-navy-700 dark:border-l-teal-400 dark:bg-navy-900 dark:hover:bg-navy-800"
|
||||
className="shrink-0 flex items-center justify-between border-b border-l-4 border-teal-200 border-l-teal-600 bg-teal-50 px-3 py-2.5 cursor-pointer shadow-md ring-1 ring-inset ring-teal-100 hover:bg-teal-100 dark:border-teal-900/50 dark:border-l-teal-300 dark:bg-teal-950/30 dark:ring-teal-800/60 dark:hover:bg-teal-900/40"
|
||||
>
|
||||
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">
|
||||
<span className="text-sm font-bold text-navy-950 dark:text-warm-100">
|
||||
{t('filters.addFilter')}
|
||||
</span>
|
||||
<ChevronIcon
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useEffect, type FormEvent } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Trans, useTranslation } from 'react-i18next';
|
||||
|
||||
import { CloseIcon, SpinnerIcon } from '../../ui/icons';
|
||||
|
||||
|
|
@ -8,9 +8,11 @@ interface ClearFiltersDialogProps {
|
|||
saveName: string;
|
||||
saveError: string | null;
|
||||
savingSearch?: boolean;
|
||||
editingSearchName?: string | null;
|
||||
onClose: () => void;
|
||||
onSaveNameChange: (value: string) => void;
|
||||
onSaveAndClear: (e: FormEvent) => void;
|
||||
onUpdateAndClear?: () => void;
|
||||
onClearWithoutSaving: () => void;
|
||||
}
|
||||
|
||||
|
|
@ -19,12 +21,15 @@ export function ClearFiltersDialog({
|
|||
saveName,
|
||||
saveError,
|
||||
savingSearch,
|
||||
editingSearchName,
|
||||
onClose,
|
||||
onSaveNameChange,
|
||||
onSaveAndClear,
|
||||
onUpdateAndClear,
|
||||
onClearWithoutSaving,
|
||||
}: ClearFiltersDialogProps) {
|
||||
const { t } = useTranslation();
|
||||
const isEditing = !!editingSearchName && !!onUpdateAndClear;
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
|
@ -55,6 +60,40 @@ export function ClearFiltersDialog({
|
|||
<CloseIcon className="w-5 h-5" />
|
||||
</button>
|
||||
</div>
|
||||
{isEditing ? (
|
||||
<div className="p-5 pt-2 space-y-4">
|
||||
<p className="text-sm text-warm-600 dark:text-warm-400">
|
||||
<Trans
|
||||
i18nKey="filters.clearAllUpdatePrompt"
|
||||
values={{ name: editingSearchName }}
|
||||
components={{
|
||||
strong: (
|
||||
<strong className="font-semibold text-navy-950 dark:text-warm-100" />
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</p>
|
||||
{saveError && <p className="text-sm text-red-600 dark:text-red-300">{saveError}</p>}
|
||||
<div className="flex flex-col items-stretch gap-3 sm:flex-row sm:flex-wrap sm:items-center sm:justify-center">
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClearWithoutSaving}
|
||||
className="px-4 py-2 text-sm rounded border border-warm-200 dark:border-warm-700 text-warm-700 dark:text-warm-300 hover:bg-warm-50 dark:hover:bg-warm-700"
|
||||
>
|
||||
{t('filters.clearWithoutUpdating')}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onUpdateAndClear}
|
||||
disabled={savingSearch}
|
||||
className="flex items-center justify-center gap-2 px-4 py-2 text-sm rounded bg-teal-600 text-white font-medium hover:bg-teal-700 disabled:opacity-50 disabled:cursor-wait"
|
||||
>
|
||||
{savingSearch && <SpinnerIcon className="w-4 h-4 animate-spin" />}
|
||||
{savingSearch ? t('savedPage.updating') : t('filters.updateAndClear')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<form onSubmit={onSaveAndClear} className="p-5 pt-2 space-y-4">
|
||||
<p className="text-sm text-warm-600 dark:text-warm-400">
|
||||
{t('filters.clearAllSavePrompt')}
|
||||
|
|
@ -88,6 +127,7 @@ export function ClearFiltersDialog({
|
|||
</button>
|
||||
</div>
|
||||
</form>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ interface MobileMapPageProps {
|
|||
renderPropertiesPane: () => ReactNode;
|
||||
toasts: ReactNode;
|
||||
upgradeModal: ReactNode;
|
||||
editingBar?: ReactNode;
|
||||
}
|
||||
|
||||
export function MobileMapPage({
|
||||
|
|
@ -95,6 +96,7 @@ export function MobileMapPage({
|
|||
renderPropertiesPane,
|
||||
toasts,
|
||||
upgradeModal,
|
||||
editingBar,
|
||||
}: MobileMapPageProps) {
|
||||
return (
|
||||
<div className="flex-1 overflow-hidden relative">
|
||||
|
|
@ -154,6 +156,7 @@ export function MobileMapPage({
|
|||
|
||||
<MobileBottomSheet
|
||||
legend={mobileLegend}
|
||||
editingBar={editingBar}
|
||||
onCoveredHeightChange={onBottomSheetCoveredHeightChange}
|
||||
>
|
||||
{filtersPane}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ export interface MapPageProps {
|
|||
deferTutorial?: boolean;
|
||||
onSaveSearch?: (name: string, paramsOverride?: string) => Promise<void>;
|
||||
savingSearch?: boolean;
|
||||
editingSearch?: { id: string; name: string } | null;
|
||||
onCancelEdit?: () => void;
|
||||
onUpdateEdit?: (params: string) => Promise<void>;
|
||||
onUpdateEditInPlace?: (params: string) => Promise<void>;
|
||||
}
|
||||
|
||||
export type MapFlyTo = (lat: number, lng: number, zoom: number, options?: MapFlyToOptions) => void;
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ export function DestinationDropdown({
|
|||
(portal ? (
|
||||
createPortal(dropdown, document.body)
|
||||
) : (
|
||||
<div className="absolute top-full left-0 right-0 mt-1 z-30">{dropdown}</div>
|
||||
<div className="relative z-30 mt-1">{dropdown}</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useState, useCallback, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Trans, useTranslation } from 'react-i18next';
|
||||
import type { AuthUser } from '../../hooks/useAuth';
|
||||
import { shortenUrl, prewarmScreenshot, paramsWithLanguage } from '../../lib/api';
|
||||
import { copyToClipboard } from '../../lib/clipboard';
|
||||
|
|
@ -64,6 +64,11 @@ export const PAGE_PATHS: Record<Page, string> = {
|
|||
|
||||
const DASHBOARD_TABLET_SIDEBAR_QUERY = '(min-width: 768px) and (max-width: 1023px)';
|
||||
|
||||
export interface EditingSearchState {
|
||||
id: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export default function Header({
|
||||
activePage,
|
||||
activeHash,
|
||||
|
|
@ -74,6 +79,9 @@ export default function Header({
|
|||
dashboardParams,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
editingSearch,
|
||||
onCancelEdit,
|
||||
onUpdateEdit,
|
||||
user,
|
||||
onLoginClick,
|
||||
onRegisterClick,
|
||||
|
|
@ -89,6 +97,9 @@ export default function Header({
|
|||
dashboardParams: string;
|
||||
onSaveSearch: (() => void) | null;
|
||||
savingSearch: boolean;
|
||||
editingSearch: EditingSearchState | null;
|
||||
onCancelEdit: () => void;
|
||||
onUpdateEdit: () => void;
|
||||
user: AuthUser | null;
|
||||
onLoginClick: () => void;
|
||||
onRegisterClick: () => void;
|
||||
|
|
@ -170,9 +181,38 @@ export default function Header({
|
|||
: 'text-warm-300 hover:bg-navy-800 hover:text-white'
|
||||
}`;
|
||||
|
||||
const showEditingBar = !isMobile && editingSearch && activePage === 'dashboard';
|
||||
|
||||
return (
|
||||
<>
|
||||
<header className="relative z-50 h-12 bg-navy-900 text-white flex items-center px-4 shrink-0">
|
||||
{showEditingBar && (
|
||||
<div className="pointer-events-none absolute inset-x-0 top-0 bottom-0 flex items-center justify-center px-4">
|
||||
<div className="pointer-events-auto flex items-center gap-3 max-w-[60%]">
|
||||
<span className="text-sm text-warm-300 truncate" title={editingSearch.name}>
|
||||
<Trans
|
||||
i18nKey="savedPage.isBeingUpdated"
|
||||
values={{ name: editingSearch.name }}
|
||||
components={{ strong: <strong className="font-semibold text-white" /> }}
|
||||
/>
|
||||
</span>
|
||||
<button
|
||||
onClick={onCancelEdit}
|
||||
className="cursor-pointer px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm"
|
||||
>
|
||||
{t('common.cancel')}
|
||||
</button>
|
||||
<button
|
||||
onClick={onUpdateEdit}
|
||||
disabled={savingSearch}
|
||||
className="cursor-pointer px-3 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
|
||||
>
|
||||
{savingSearch && <SpinnerIcon className="w-4 h-4 animate-spin" />}
|
||||
{savingSearch ? t('savedPage.updating') : t('common.update')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Left: Logo + nav */}
|
||||
<div className="flex items-center gap-4">
|
||||
<a
|
||||
|
|
@ -261,7 +301,7 @@ export default function Header({
|
|||
{exportState.exporting ? t('header.exporting') : t('header.exportLabel')}
|
||||
</button>
|
||||
)}
|
||||
{onSaveSearch && (
|
||||
{onSaveSearch && !editingSearch && (
|
||||
<button
|
||||
onClick={onSaveSearch}
|
||||
disabled={savingSearch}
|
||||
|
|
@ -369,6 +409,7 @@ export default function Header({
|
|||
exportState={exportState}
|
||||
onSaveSearch={onSaveSearch}
|
||||
savingSearch={savingSearch}
|
||||
isEditingSearch={!!editingSearch}
|
||||
user={user}
|
||||
onLoginClick={onLoginClick}
|
||||
onRegisterClick={onRegisterClick}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ interface MobileMenuProps {
|
|||
exportState: HeaderExportState | null;
|
||||
onSaveSearch: (() => void) | null;
|
||||
savingSearch: boolean;
|
||||
isEditingSearch: boolean;
|
||||
user: AuthUser | null;
|
||||
onLoginClick: () => void;
|
||||
onRegisterClick: () => void;
|
||||
|
|
@ -40,6 +41,7 @@ export default function MobileMenu({
|
|||
exportState,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
isEditingSearch,
|
||||
user,
|
||||
onLoginClick,
|
||||
onRegisterClick,
|
||||
|
|
@ -144,7 +146,7 @@ export default function MobileMenu({
|
|||
) : (
|
||||
<BookmarkIcon className="w-4 h-4" />
|
||||
)}
|
||||
{t('common.save')}
|
||||
{isEditingSearch ? t('common.update') : t('common.save')}
|
||||
</button>
|
||||
)}
|
||||
{dashboardSavedItem}
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ describe('useMapData', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('resets the colour range to drag preview data while a slider is active', async () => {
|
||||
it('resets the colour range to visible drag preview data while a slider is active', async () => {
|
||||
const bounds = { south: 1, west: 1, north: 2, east: 2 };
|
||||
const features: FeatureMeta[] = [
|
||||
{
|
||||
|
|
@ -139,16 +139,28 @@ describe('useMapData', () => {
|
|||
const filters = { price: [20, 80] as [number, number] };
|
||||
|
||||
const { result, rerender } = renderHook(
|
||||
({ activeFeature }: { activeFeature: string | null }) =>
|
||||
({
|
||||
activeFeature,
|
||||
filterRange,
|
||||
}: {
|
||||
activeFeature: string | null;
|
||||
filterRange: [number, number] | null;
|
||||
}) =>
|
||||
useMapData({
|
||||
filters,
|
||||
features,
|
||||
viewFeature: 'price',
|
||||
activeFeature,
|
||||
pinnedFeature: null,
|
||||
filterRange,
|
||||
travelTimeEntries: noTravelTimeEntries,
|
||||
}),
|
||||
{ initialProps: { activeFeature: null as string | null } }
|
||||
{
|
||||
initialProps: {
|
||||
activeFeature: null as string | null,
|
||||
filterRange: filters.price,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
|
|
@ -171,27 +183,58 @@ describe('useMapData', () => {
|
|||
expect(result.current.colorRange?.[1]).toBeCloseTo(77);
|
||||
|
||||
await act(async () => {
|
||||
rerender({ activeFeature: 'price' });
|
||||
rerender({ activeFeature: 'price', filterRange: filters.price });
|
||||
await flushPromises();
|
||||
});
|
||||
expect(requests).toHaveLength(2);
|
||||
|
||||
const previewData = [
|
||||
{
|
||||
h3: 'preview-outside-low',
|
||||
count: 1,
|
||||
lat: 1.1,
|
||||
lon: 1.1,
|
||||
min_price: 0,
|
||||
max_price: 10,
|
||||
avg_price: 5,
|
||||
},
|
||||
{
|
||||
h3: 'preview-low',
|
||||
count: 1,
|
||||
lat: 1.25,
|
||||
lon: 1.25,
|
||||
min_price: 20,
|
||||
max_price: 20,
|
||||
avg_price: 20,
|
||||
},
|
||||
{
|
||||
h3: 'preview-high',
|
||||
count: 1,
|
||||
lat: 1.75,
|
||||
lon: 1.75,
|
||||
min_price: 80,
|
||||
max_price: 80,
|
||||
avg_price: 80,
|
||||
},
|
||||
{
|
||||
h3: 'preview-outside-high',
|
||||
count: 1,
|
||||
lat: 1.9,
|
||||
lon: 1.9,
|
||||
min_price: 90,
|
||||
max_price: 100,
|
||||
avg_price: 95,
|
||||
},
|
||||
];
|
||||
|
||||
await act(async () => {
|
||||
requests[1].resolve(
|
||||
response([
|
||||
{ h3: 'preview-low', count: 1, lat: 1.25, lon: 1.25, avg_price: 0 },
|
||||
{ h3: 'preview-high', count: 1, lat: 1.75, lon: 1.75, avg_price: 100 },
|
||||
])
|
||||
);
|
||||
requests[1].resolve(response(previewData));
|
||||
await flushPromises();
|
||||
});
|
||||
|
||||
expect(result.current.data).toEqual([
|
||||
{ h3: 'preview-low', count: 1, lat: 1.25, lon: 1.25, avg_price: 0 },
|
||||
{ h3: 'preview-high', count: 1, lat: 1.75, lon: 1.75, avg_price: 100 },
|
||||
]);
|
||||
expect(result.current.colorRange?.[0]).toBeCloseTo(5);
|
||||
expect(result.current.colorRange?.[1]).toBeCloseTo(95);
|
||||
expect(result.current.data).toEqual(previewData);
|
||||
expect(result.current.colorRange?.[0]).toBeCloseTo(23);
|
||||
expect(result.current.colorRange?.[1]).toBeCloseTo(77);
|
||||
});
|
||||
|
||||
it('does not use metadata min/max while slider preview colour data is loading', async () => {
|
||||
|
|
@ -270,6 +313,82 @@ describe('useMapData', () => {
|
|||
expect(result.current.colorRange?.[1]).toBeCloseTo(95);
|
||||
});
|
||||
|
||||
it('does not use stale committed feature data while slider preview colour data is loading', async () => {
|
||||
const bounds = { south: 1, west: 1, north: 2, east: 2 };
|
||||
const features: FeatureMeta[] = [
|
||||
{
|
||||
name: 'price',
|
||||
type: 'numeric',
|
||||
min: 0,
|
||||
max: 1_000,
|
||||
},
|
||||
];
|
||||
|
||||
const { result, rerender } = renderHook(
|
||||
({
|
||||
filters,
|
||||
activeFeature,
|
||||
}: {
|
||||
filters: Record<string, [number, number]>;
|
||||
activeFeature: string | null;
|
||||
}) =>
|
||||
useMapData({
|
||||
filters,
|
||||
features,
|
||||
viewFeature: 'price',
|
||||
activeFeature,
|
||||
pinnedFeature: null,
|
||||
travelTimeEntries: noTravelTimeEntries,
|
||||
}),
|
||||
{
|
||||
initialProps: {
|
||||
filters: { price: [0, 1_000] as [number, number] },
|
||||
activeFeature: null as string | null,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
result.current.handleViewChange(viewChange(bounds));
|
||||
});
|
||||
await act(async () => {
|
||||
vi.advanceTimersByTime(150);
|
||||
});
|
||||
await act(async () => {
|
||||
requests[0].resolve(
|
||||
response([
|
||||
{ h3: 'stale-low', count: 1, lat: 1.25, lon: 1.25, avg_price: 0 },
|
||||
{ h3: 'stale-high', count: 1, lat: 1.75, lon: 1.75, avg_price: 1_000 },
|
||||
])
|
||||
);
|
||||
await flushPromises();
|
||||
});
|
||||
expect(result.current.colorRange?.[1]).toBeCloseTo(950);
|
||||
|
||||
await act(async () => {
|
||||
rerender({
|
||||
filters: { price: [20, 80] },
|
||||
activeFeature: 'price',
|
||||
});
|
||||
await flushPromises();
|
||||
});
|
||||
|
||||
expect(result.current.colorRange).toBeNull();
|
||||
|
||||
await act(async () => {
|
||||
requests[1].resolve(
|
||||
response([
|
||||
{ h3: 'preview-low', count: 1, lat: 1.25, lon: 1.25, avg_price: 20 },
|
||||
{ h3: 'preview-high', count: 1, lat: 1.75, lon: 1.75, avg_price: 80 },
|
||||
])
|
||||
);
|
||||
await flushPromises();
|
||||
});
|
||||
|
||||
expect(result.current.colorRange?.[0]).toBeCloseTo(23);
|
||||
expect(result.current.colorRange?.[1]).toBeCloseTo(77);
|
||||
});
|
||||
|
||||
it('does not reuse cached drag preview data when the drag request changes', async () => {
|
||||
const bounds = { south: 1, west: 1, north: 2, east: 2 };
|
||||
const features: FeatureMeta[] = [
|
||||
|
|
|
|||
|
|
@ -45,18 +45,38 @@ interface UseMapDataOptions {
|
|||
viewFeature: string | null;
|
||||
activeFeature: string | null;
|
||||
pinnedFeature: string | null;
|
||||
filterRange?: [number, number] | null;
|
||||
travelTimeEntries: TravelTimeEntry[];
|
||||
/** Share-link code from the URL; appended to data fetches so the backend
|
||||
* grants bbox-scoped access for unlicensed recipients. */
|
||||
shareCode?: string;
|
||||
}
|
||||
|
||||
function getFiniteNumber(value: unknown): number | null {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
function valueInVisibleRange(
|
||||
value: number,
|
||||
minValue: number | null,
|
||||
maxValue: number | null,
|
||||
visibleRange: [number, number] | null
|
||||
): number | null {
|
||||
if (!visibleRange) return value;
|
||||
|
||||
const itemMin = minValue ?? value;
|
||||
const itemMax = maxValue ?? value;
|
||||
if (itemMax < visibleRange[0] || itemMin > visibleRange[1]) return null;
|
||||
return Math.max(visibleRange[0], Math.min(visibleRange[1], value));
|
||||
}
|
||||
|
||||
export function useMapData({
|
||||
filters,
|
||||
features,
|
||||
viewFeature,
|
||||
activeFeature,
|
||||
pinnedFeature,
|
||||
filterRange = null,
|
||||
travelTimeEntries,
|
||||
shareCode,
|
||||
}: UseMapDataOptions) {
|
||||
|
|
@ -487,8 +507,15 @@ export function useMapData({
|
|||
if (lat < bounds.south || lat > bounds.north || lng < bounds.west || lng > bounds.east)
|
||||
continue;
|
||||
}
|
||||
const val = feat.properties[`avg_${dataViewFeature}`];
|
||||
if (typeof val === 'number' && !isNaN(val)) vals.push(val);
|
||||
const val = getFiniteNumber(feat.properties[`avg_${dataViewFeature}`]);
|
||||
if (val == null) continue;
|
||||
const visibleValue = valueInVisibleRange(
|
||||
val,
|
||||
getFiniteNumber(feat.properties[`min_${dataViewFeature}`]),
|
||||
getFiniteNumber(feat.properties[`max_${dataViewFeature}`]),
|
||||
filterRange
|
||||
);
|
||||
if (visibleValue != null) vals.push(visibleValue);
|
||||
}
|
||||
} else {
|
||||
if (data.length === 0) return null;
|
||||
|
|
@ -498,8 +525,15 @@ export function useMapData({
|
|||
if (lat < bounds.south || lat > bounds.north || lon < bounds.west || lon > bounds.east)
|
||||
continue;
|
||||
}
|
||||
const val = item[`avg_${dataViewFeature}`];
|
||||
if (typeof val === 'number' && !isNaN(val)) vals.push(val);
|
||||
const val = getFiniteNumber(item[`avg_${dataViewFeature}`]);
|
||||
if (val == null) continue;
|
||||
const visibleValue = valueInVisibleRange(
|
||||
val,
|
||||
getFiniteNumber(item[`min_${dataViewFeature}`]),
|
||||
getFiniteNumber(item[`max_${dataViewFeature}`]),
|
||||
filterRange
|
||||
);
|
||||
if (visibleValue != null) vals.push(visibleValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -515,6 +549,7 @@ export function useMapData({
|
|||
dataViewFeature,
|
||||
effectivePostcodeData,
|
||||
features,
|
||||
filterRange,
|
||||
hasCurrentRangeData,
|
||||
usePostcodeView,
|
||||
]);
|
||||
|
|
|
|||
|
|
@ -176,6 +176,46 @@ export function useSavedSearches(userId: string | null) {
|
|||
}
|
||||
}, []);
|
||||
|
||||
const updateSearchParams = useCallback(
|
||||
async (id: string, params: string) => {
|
||||
if (!userId) return;
|
||||
setSaving(true);
|
||||
setError(null);
|
||||
try {
|
||||
const record = await pb.collection('saved_searches').update(id, { params });
|
||||
trackEvent('Search Update');
|
||||
setSearches((prev) =>
|
||||
prev.map((s) => (s.id === id ? { ...s, params, screenshotUrl: '' } : s))
|
||||
);
|
||||
|
||||
// Refresh screenshot in the background
|
||||
const screenshotParams = new URLSearchParams(params);
|
||||
const screenshotUrl = apiUrl('screenshot', screenshotParams);
|
||||
fetch(screenshotUrl, authHeaders())
|
||||
.then((res) => {
|
||||
if (!res.ok) throw new Error(`Screenshot ${res.status}`);
|
||||
return res.blob();
|
||||
})
|
||||
.then((blob) => {
|
||||
const patch = new FormData();
|
||||
patch.append('screenshot', blob, 'screenshot.jpg');
|
||||
return pb.collection('saved_searches').update(record.id, patch);
|
||||
})
|
||||
.then(() => fetchSearches())
|
||||
.catch((err) => {
|
||||
console.warn('Background screenshot failed:', err);
|
||||
});
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to update search';
|
||||
setError(msg);
|
||||
throw err;
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
},
|
||||
[userId, fetchSearches]
|
||||
);
|
||||
|
||||
return {
|
||||
searches,
|
||||
loading,
|
||||
|
|
@ -186,5 +226,6 @@ export function useSavedSearches(userId: string | null) {
|
|||
deleteSearch,
|
||||
updateSearchNotes,
|
||||
updateSearchName,
|
||||
updateSearchParams,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const de: Translations = {
|
|||
// ── Common ──────────────────────────────────────────
|
||||
common: {
|
||||
save: 'Speichern',
|
||||
update: 'Aktualisieren',
|
||||
cancel: 'Abbrechen',
|
||||
close: 'Schließen',
|
||||
delete: 'Löschen',
|
||||
|
|
@ -677,8 +678,12 @@ const de: Translations = {
|
|||
clearAll: 'Alle löschen',
|
||||
clearAllTitle: 'Alle Filter löschen?',
|
||||
clearAllSavePrompt: 'Möchtest du deine aktuellen Filter vor dem Löschen speichern?',
|
||||
clearAllUpdatePrompt:
|
||||
'<strong>{{name}}</strong> mit den aktuellen Filtern aktualisieren, bevor gelöscht wird?',
|
||||
saveAndClear: 'Speichern & löschen',
|
||||
updateAndClear: 'Aktualisieren & löschen',
|
||||
clearWithoutSaving: 'Ohne Speichern löschen',
|
||||
clearWithoutUpdating: 'Ohne Aktualisieren löschen',
|
||||
filtersOut: 'filtert {{value}} heraus',
|
||||
schoolType: 'Schultyp',
|
||||
schoolRating: 'Schulbewertung',
|
||||
|
|
@ -1280,6 +1285,8 @@ const de: Translations = {
|
|||
deleteSearch: 'Suche löschen',
|
||||
deleteSearchConfirm:
|
||||
'Möchtest du diese gespeicherte Suche wirklich löschen? Dies kann nicht rückgängig gemacht werden.',
|
||||
isBeingUpdated: '<strong>{{name}}</strong> wird aktualisiert',
|
||||
updating: 'Aktualisiere...',
|
||||
},
|
||||
|
||||
// ── Invites Page ───────────────────────────────────
|
||||
|
|
@ -1378,6 +1385,7 @@ const de: Translations = {
|
|||
'Property prices': 'Immobilienpreise',
|
||||
Transport: 'Verkehr',
|
||||
Education: 'Bildung',
|
||||
'Defining characteristics': 'Prägende Merkmale',
|
||||
'Area development': 'Gebietsentwicklung',
|
||||
Crime: 'Kriminalität',
|
||||
Neighbours: 'Nachbarn',
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ const en = {
|
|||
// ── Common ──────────────────────────────────────────
|
||||
common: {
|
||||
save: 'Save',
|
||||
update: 'Update',
|
||||
cancel: 'Cancel',
|
||||
close: 'Close',
|
||||
delete: 'Delete',
|
||||
|
|
@ -652,8 +653,11 @@ const en = {
|
|||
clearAll: 'Clear all',
|
||||
clearAllTitle: 'Clear all filters?',
|
||||
clearAllSavePrompt: 'Would you like to save your current filters before clearing?',
|
||||
clearAllUpdatePrompt: 'Update <strong>{{name}}</strong> with your current filters before clearing?',
|
||||
saveAndClear: 'Save & Clear',
|
||||
updateAndClear: 'Update & Clear',
|
||||
clearWithoutSaving: 'Clear without saving',
|
||||
clearWithoutUpdating: 'Clear without updating',
|
||||
filtersOut: 'filters out {{value}}',
|
||||
schoolType: 'School type',
|
||||
schoolRating: 'School rating',
|
||||
|
|
@ -1245,6 +1249,8 @@ const en = {
|
|||
notesPlaceholder: 'Jot down your thoughts...',
|
||||
deleteSearch: 'Delete search',
|
||||
deleteSearchConfirm: 'Are you sure you want to delete this saved search? This can’t be undone.',
|
||||
isBeingUpdated: '<strong>{{name}}</strong> is being updated',
|
||||
updating: 'Updating...',
|
||||
},
|
||||
|
||||
// ── Invites Page ───────────────────────────────────
|
||||
|
|
@ -1342,6 +1348,7 @@ const en = {
|
|||
'Property prices': 'Property prices',
|
||||
Transport: 'Transport',
|
||||
Education: 'Education',
|
||||
'Defining characteristics': 'Defining characteristics',
|
||||
'Area development': 'Area development',
|
||||
Crime: 'Crime',
|
||||
Neighbours: 'Neighbours',
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const fr: Translations = {
|
|||
// ── Common ──────────────────────────────────────────
|
||||
common: {
|
||||
save: 'Enregistrer',
|
||||
update: 'Mettre à jour',
|
||||
cancel: 'Annuler',
|
||||
close: 'Fermer',
|
||||
delete: 'Supprimer',
|
||||
|
|
@ -681,8 +682,12 @@ const fr: Translations = {
|
|||
clearAll: 'Tout effacer',
|
||||
clearAllTitle: 'Effacer tous les filtres ?',
|
||||
clearAllSavePrompt: 'Souhaitez-vous sauvegarder vos filtres actuels avant de les effacer ?',
|
||||
clearAllUpdatePrompt:
|
||||
'Mettre à jour <strong>{{name}}</strong> avec vos filtres actuels avant d’effacer ?',
|
||||
saveAndClear: 'Sauvegarder et effacer',
|
||||
updateAndClear: 'Mettre à jour et effacer',
|
||||
clearWithoutSaving: 'Effacer sans sauvegarder',
|
||||
clearWithoutUpdating: 'Effacer sans mettre à jour',
|
||||
filtersOut: 'exclut {{value}}',
|
||||
schoolType: 'Type d’école',
|
||||
schoolRating: 'Note de l’école',
|
||||
|
|
@ -1286,6 +1291,8 @@ const fr: Translations = {
|
|||
deleteSearch: 'Supprimer la recherche',
|
||||
deleteSearchConfirm:
|
||||
'Êtes-vous sûr de vouloir supprimer cette recherche enregistrée ? Cette action est irréversible.',
|
||||
isBeingUpdated: 'Mise à jour de <strong>{{name}}</strong>',
|
||||
updating: 'Mise à jour...',
|
||||
},
|
||||
|
||||
// ── Invites Page ───────────────────────────────────
|
||||
|
|
@ -1384,6 +1391,7 @@ const fr: Translations = {
|
|||
'Property prices': 'Prix immobiliers',
|
||||
Transport: 'Transports',
|
||||
Education: 'Éducation',
|
||||
'Defining characteristics': 'Caractéristiques déterminantes',
|
||||
'Area development': 'Développement du quartier',
|
||||
Crime: 'Criminalité',
|
||||
Neighbours: 'Voisins',
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import type { Translations } from './en';
|
|||
const hi: Translations = {
|
||||
common: {
|
||||
save: 'सहेजें',
|
||||
update: 'अपडेट करें',
|
||||
cancel: 'रद्द करें',
|
||||
close: 'बंद करें',
|
||||
delete: 'हटाएं',
|
||||
|
|
@ -649,8 +650,12 @@ const hi: Translations = {
|
|||
clearAll: 'सभी साफ करें',
|
||||
clearAllTitle: 'सभी फिल्टर साफ करें?',
|
||||
clearAllSavePrompt: 'क्या साफ करने से पहले आप अपने मौजूदा फिल्टर सहेजना चाहेंगे?',
|
||||
clearAllUpdatePrompt:
|
||||
'साफ करने से पहले <strong>{{name}}</strong> को अपने मौजूदा फिल्टर के साथ अपडेट करें?',
|
||||
saveAndClear: 'सहेजें और साफ करें',
|
||||
updateAndClear: 'अपडेट करें और साफ करें',
|
||||
clearWithoutSaving: 'बिना सहेजे साफ करें',
|
||||
clearWithoutUpdating: 'बिना अपडेट किए साफ करें',
|
||||
filtersOut: '{{value}} को फिल्टर करता है',
|
||||
schoolType: 'स्कूल प्रकार',
|
||||
schoolRating: 'स्कूल रेटिंग',
|
||||
|
|
@ -1210,6 +1215,8 @@ const hi: Translations = {
|
|||
deleteSearch: 'खोज हटाएं',
|
||||
deleteSearchConfirm:
|
||||
'क्या आप वाकई यह सहेजी गई खोज हटाना चाहते हैं? इसे वापस नहीं किया जा सकता.',
|
||||
isBeingUpdated: '<strong>{{name}}</strong> अपडेट हो रहा है',
|
||||
updating: 'अपडेट हो रहा है...',
|
||||
},
|
||||
|
||||
invitesPage: {
|
||||
|
|
@ -1299,6 +1306,7 @@ const hi: Translations = {
|
|||
'Property prices': 'संपत्ति कीमतें',
|
||||
Transport: 'परिवहन',
|
||||
Education: 'शिक्षा',
|
||||
'Defining characteristics': 'मुख्य विशेषताएं',
|
||||
'Area development': 'क्षेत्र विकास',
|
||||
Crime: 'अपराध',
|
||||
Neighbours: 'पड़ोसी',
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const hu: Translations = {
|
|||
// ── Common ──────────────────────────────────────────
|
||||
common: {
|
||||
save: 'Mentés',
|
||||
update: 'Frissítés',
|
||||
cancel: 'Mégse',
|
||||
close: 'Bezárás',
|
||||
delete: 'Törlés',
|
||||
|
|
@ -665,8 +666,12 @@ const hu: Translations = {
|
|||
clearAll: 'Összes törlése',
|
||||
clearAllTitle: 'Összes szűrő törlése?',
|
||||
clearAllSavePrompt: 'Szeretnéd menteni a jelenlegi szűrőket a törlés előtt?',
|
||||
clearAllUpdatePrompt:
|
||||
'Frissíted a(z) <strong>{{name}}</strong> keresést a jelenlegi szűrőkkel törlés előtt?',
|
||||
saveAndClear: 'Mentés és törlés',
|
||||
updateAndClear: 'Frissítés és törlés',
|
||||
clearWithoutSaving: 'Törlés mentés nélkül',
|
||||
clearWithoutUpdating: 'Törlés frissítés nélkül',
|
||||
filtersOut: '{{value}} elemet kiszűr',
|
||||
schoolType: 'Iskolatípus',
|
||||
schoolRating: 'Iskolai értékelés',
|
||||
|
|
@ -1264,6 +1269,8 @@ const hu: Translations = {
|
|||
deleteSearch: 'Keresés törlése',
|
||||
deleteSearchConfirm:
|
||||
'Biztosan törölni szeretnéd ezt a mentett keresést? Ez nem vonható vissza.',
|
||||
isBeingUpdated: '<strong>{{name}}</strong> frissítése folyamatban',
|
||||
updating: 'Frissítés...',
|
||||
},
|
||||
|
||||
// ── Invites Page ───────────────────────────────────
|
||||
|
|
@ -1363,6 +1370,7 @@ const hu: Translations = {
|
|||
'Property prices': 'Ingatlanárak',
|
||||
Transport: 'Közlekedés',
|
||||
Education: 'Oktatás',
|
||||
'Defining characteristics': 'Meghatározó jellemzők',
|
||||
'Area development': 'Területi fejlődés',
|
||||
Crime: 'Bűnözés',
|
||||
Neighbours: 'Szomszédok',
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const zh: Translations = {
|
|||
// ── Common ──────────────────────────────────────────
|
||||
common: {
|
||||
save: '保存',
|
||||
update: '更新',
|
||||
cancel: '取消',
|
||||
close: '关闭',
|
||||
delete: '删除',
|
||||
|
|
@ -616,8 +617,11 @@ const zh: Translations = {
|
|||
clearAll: '全部清除',
|
||||
clearAllTitle: '清除所有筛选条件?',
|
||||
clearAllSavePrompt: '是否要在清除前保存当前的筛选条件?',
|
||||
clearAllUpdatePrompt: '在清除前使用当前筛选条件更新 <strong>{{name}}</strong>?',
|
||||
saveAndClear: '保存并清除',
|
||||
updateAndClear: '更新并清除',
|
||||
clearWithoutSaving: '不保存直接清除',
|
||||
clearWithoutUpdating: '不更新直接清除',
|
||||
filtersOut: '筛除 {{value}}',
|
||||
schoolType: '学校类型',
|
||||
schoolRating: '学校评级',
|
||||
|
|
@ -1195,6 +1199,8 @@ const zh: Translations = {
|
|||
notesPlaceholder: '记下您的想法...',
|
||||
deleteSearch: '删除搜索',
|
||||
deleteSearchConfirm: '确定要删除这个保存的搜索吗?此操作无法撤销。',
|
||||
isBeingUpdated: '正在更新 <strong>{{name}}</strong>',
|
||||
updating: '更新中...',
|
||||
},
|
||||
|
||||
// ── Invites Page ───────────────────────────────────
|
||||
|
|
@ -1290,6 +1296,7 @@ const zh: Translations = {
|
|||
'Property prices': '房价',
|
||||
Transport: '交通',
|
||||
Education: '教育',
|
||||
'Defining characteristics': '主要特征',
|
||||
'Area development': '区域发展',
|
||||
Crime: '犯罪',
|
||||
Neighbours: '邻居',
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
<title>Perfect Postcode - Find where to buy before browsing listings</title>
|
||||
<meta name="description" content="Filter every postcode in England by budget, commute, schools, crime, noise, broadband, property prices and amenities before you start chasing viewings." />
|
||||
<meta name="x-og-placeholder" content="__PERFECT_POSTCODE_OG_TAGS__" />
|
||||
<script id="perfect-postcode-bugsink-config" type="application/json">__PERFECT_POSTCODE_BUGSINK_CONFIG__</script>
|
||||
<script>
|
||||
(function() {
|
||||
var theme = localStorage.getItem('theme');
|
||||
|
|
|
|||
|
|
@ -1,15 +1,29 @@
|
|||
import { createRoot } from 'react-dom/client';
|
||||
import App from './App';
|
||||
import { i18nReady } from './i18n';
|
||||
import { BugsinkErrorBoundary, initBugsink } from './lib/bugsink';
|
||||
import './index.css';
|
||||
import './hooks/usePlausible';
|
||||
|
||||
initBugsink();
|
||||
|
||||
const container = document.getElementById('root');
|
||||
if (!container) {
|
||||
throw new Error('Root element not found');
|
||||
}
|
||||
const root = container;
|
||||
|
||||
function AppErrorFallback() {
|
||||
return (
|
||||
<div className="flex min-h-screen items-center justify-center bg-warm-50 px-6 text-center text-warm-900 dark:bg-navy-950 dark:text-warm-100">
|
||||
<div>
|
||||
<h1 className="text-xl font-semibold">Something went wrong</h1>
|
||||
<p className="mt-2 text-sm text-warm-600 dark:text-warm-300">Refresh the page to try again.</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function renderApp() {
|
||||
const hasPrerenderedMarkup = root.children.length > 0;
|
||||
|
||||
|
|
@ -18,7 +32,11 @@ function renderApp() {
|
|||
}
|
||||
root.removeAttribute('data-prerender-path');
|
||||
|
||||
createRoot(root).render(<App />);
|
||||
createRoot(root).render(
|
||||
<BugsinkErrorBoundary fallback={<AppErrorFallback />}>
|
||||
<App />
|
||||
</BugsinkErrorBoundary>
|
||||
);
|
||||
}
|
||||
|
||||
void i18nReady.then(renderApp);
|
||||
|
|
|
|||
100
frontend/src/lib/bugsink.tsx
Normal file
100
frontend/src/lib/bugsink.tsx
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
import * as Sentry from '@sentry/react';
|
||||
import type { ReactElement, ReactNode } from 'react';
|
||||
|
||||
declare const __BUGSINK_DSN__: string | undefined;
|
||||
declare const __BUGSINK_ENVIRONMENT__: string | undefined;
|
||||
declare const __BUGSINK_RELEASE__: string | undefined;
|
||||
declare const __BUGSINK_SEND_DEFAULT_PII__: boolean | undefined;
|
||||
|
||||
interface BugsinkConfig {
|
||||
dsn?: string;
|
||||
environment?: string;
|
||||
release?: string;
|
||||
sendDefaultPii?: boolean;
|
||||
}
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
__PERFECT_POSTCODE_BUGSINK__?: BugsinkConfig;
|
||||
}
|
||||
}
|
||||
|
||||
function nonempty(value: unknown): string | undefined {
|
||||
if (typeof value !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
function readBuildTimeString(value: unknown): string | undefined {
|
||||
return nonempty(value);
|
||||
}
|
||||
|
||||
function readBuildTimeBoolean(value: unknown): boolean {
|
||||
return typeof value === 'boolean' ? value : false;
|
||||
}
|
||||
|
||||
function readRuntimeConfig(): BugsinkConfig {
|
||||
if (typeof document === 'undefined') {
|
||||
return {};
|
||||
}
|
||||
|
||||
const element = document.getElementById('perfect-postcode-bugsink-config');
|
||||
const json = element?.textContent?.trim();
|
||||
if (!json || json === '__PERFECT_POSTCODE_BUGSINK_CONFIG__') {
|
||||
return window.__PERFECT_POSTCODE_BUGSINK__ ?? {};
|
||||
}
|
||||
|
||||
try {
|
||||
const config = JSON.parse(json) as BugsinkConfig;
|
||||
window.__PERFECT_POSTCODE_BUGSINK__ = config;
|
||||
return config;
|
||||
} catch {
|
||||
return window.__PERFECT_POSTCODE_BUGSINK__ ?? {};
|
||||
}
|
||||
}
|
||||
|
||||
export function initBugsink(): boolean {
|
||||
const runtimeConfig = readRuntimeConfig();
|
||||
const dsn =
|
||||
nonempty(runtimeConfig.dsn) ??
|
||||
readBuildTimeString(typeof __BUGSINK_DSN__ === 'string' ? __BUGSINK_DSN__ : undefined);
|
||||
|
||||
if (!dsn) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Sentry.init({
|
||||
dsn,
|
||||
environment:
|
||||
nonempty(runtimeConfig.environment) ??
|
||||
readBuildTimeString(
|
||||
typeof __BUGSINK_ENVIRONMENT__ === 'string' ? __BUGSINK_ENVIRONMENT__ : undefined
|
||||
),
|
||||
release:
|
||||
nonempty(runtimeConfig.release) ??
|
||||
readBuildTimeString(typeof __BUGSINK_RELEASE__ === 'string' ? __BUGSINK_RELEASE__ : undefined),
|
||||
sendDefaultPii:
|
||||
runtimeConfig.sendDefaultPii ??
|
||||
readBuildTimeBoolean(
|
||||
typeof __BUGSINK_SEND_DEFAULT_PII__ === 'boolean'
|
||||
? __BUGSINK_SEND_DEFAULT_PII__
|
||||
: undefined
|
||||
),
|
||||
tracesSampleRate: 0,
|
||||
});
|
||||
Sentry.setTag('app', 'frontend');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
export function BugsinkErrorBoundary({
|
||||
children,
|
||||
fallback,
|
||||
}: {
|
||||
children: ReactNode;
|
||||
fallback: ReactElement;
|
||||
}) {
|
||||
return <Sentry.ErrorBoundary fallback={fallback}>{children}</Sentry.ErrorBoundary>;
|
||||
}
|
||||
|
|
@ -16,6 +16,8 @@ const GROUP_ICONS: Record<string, ComponentType<{ className?: string }>> = {
|
|||
'Property prices': TagIcon,
|
||||
Transport: RouteIcon,
|
||||
Education: GraduationCapIcon,
|
||||
Schools: GraduationCapIcon,
|
||||
'Defining characteristics': TreeIcon,
|
||||
'Area development': ChartBarIcon,
|
||||
Crime: ShieldIcon,
|
||||
Neighbours: UsersIcon,
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ export function getSchoolFilterMeta(features: FeatureMeta[]): FeatureMeta {
|
|||
return {
|
||||
name: SCHOOL_FILTER_NAME,
|
||||
type: 'numeric',
|
||||
group: 'Education',
|
||||
group: 'Schools',
|
||||
min: sourceFeature?.min ?? 0,
|
||||
max: sourceFeature?.max ?? 10,
|
||||
step: 1,
|
||||
|
|
|
|||
|
|
@ -6,11 +6,36 @@ const ReactRefreshWebpackPlugin = require('@pmmmwh/react-refresh-webpack-plugin'
|
|||
const FaviconsWebpackPlugin = require('favicons-webpack-plugin');
|
||||
const sharp = require('sharp');
|
||||
const webpack = require('webpack');
|
||||
const packageJson = require('./package.json');
|
||||
|
||||
const HOUSE_IMAGE_WIDTH = 260;
|
||||
|
||||
function envString(...names) {
|
||||
for (const name of names) {
|
||||
const value = process.env[name];
|
||||
if (typeof value === 'string' && value.trim().length > 0) {
|
||||
return value.trim();
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function envBoolean(name, fallback = false) {
|
||||
const value = process.env[name];
|
||||
if (typeof value !== 'string' || value.trim().length === 0) {
|
||||
return fallback;
|
||||
}
|
||||
return ['1', 'true', 'yes', 'on'].includes(value.trim().toLowerCase());
|
||||
}
|
||||
|
||||
module.exports = (env, argv) => {
|
||||
const isProduction = argv.mode === 'production';
|
||||
const bugsinkEnvironment =
|
||||
envString('FRONTEND_BUGSINK_ENVIRONMENT', 'BUGSINK_ENVIRONMENT', 'SENTRY_ENVIRONMENT') ||
|
||||
(isProduction ? 'production' : 'development');
|
||||
const bugsinkRelease =
|
||||
envString('FRONTEND_BUGSINK_RELEASE', 'BUGSINK_RELEASE', 'SENTRY_RELEASE') ||
|
||||
`${packageJson.name}@${packageJson.version}`;
|
||||
|
||||
return {
|
||||
entry: './src/index.tsx',
|
||||
|
|
@ -22,6 +47,7 @@ module.exports = (env, argv) => {
|
|||
|
||||
publicPath: '/',
|
||||
},
|
||||
devtool: isProduction ? 'hidden-source-map' : 'eval-cheap-module-source-map',
|
||||
resolve: {
|
||||
extensions: ['.ts', '.tsx', '.js'],
|
||||
},
|
||||
|
|
@ -62,6 +88,14 @@ module.exports = (env, argv) => {
|
|||
plugins: [
|
||||
new webpack.DefinePlugin({
|
||||
__DEV__: JSON.stringify(!isProduction),
|
||||
__BUGSINK_DSN__: JSON.stringify(
|
||||
envString('FRONTEND_BUGSINK_DSN', 'PUBLIC_BUGSINK_DSN', 'BUGSINK_DSN') || ''
|
||||
),
|
||||
__BUGSINK_ENVIRONMENT__: JSON.stringify(bugsinkEnvironment),
|
||||
__BUGSINK_RELEASE__: JSON.stringify(bugsinkRelease),
|
||||
__BUGSINK_SEND_DEFAULT_PII__: JSON.stringify(
|
||||
envBoolean('BUGSINK_SEND_DEFAULT_PII', false)
|
||||
),
|
||||
}),
|
||||
new HtmlWebpackPlugin({
|
||||
template: './src/index.html',
|
||||
|
|
|
|||
|
|
@ -10,9 +10,12 @@ import argparse
|
|||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import osmium
|
||||
import polars as pl
|
||||
from scipy.spatial import cKDTree
|
||||
from shapely.geometry import Point
|
||||
from pyproj import Transformer
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.utils.england_geometry import (
|
||||
|
|
@ -39,6 +42,12 @@ SEARCH_PLACE_TYPES = {
|
|||
"island",
|
||||
}
|
||||
TRAVEL_DESTINATION_PLACE_TYPES = {"city"}
|
||||
ENGLAND_COUNTRY_CODE = "E92000001"
|
||||
LONDON_REGION_CODE = "E12000007"
|
||||
LONDON_LAD_PREFIX = "E09"
|
||||
LONDON_COUNTY_CODES = {"E13000001", "E13000002"}
|
||||
DISPLAY_CITY_NEAREST_POSTCODE_MAX_M = 3_000
|
||||
WGS84_TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
|
||||
|
||||
# Suffixes to strip from raw station names before appending the typed suffix.
|
||||
_STATION_STRIP = (
|
||||
|
|
@ -55,6 +64,7 @@ _STATION_STRIP = (
|
|||
|
||||
_DLR_CODE_RE = re.compile(r"ZZDL([A-Z0-9]{3})")
|
||||
_POSTCODE_RE = re.compile(r"\b([A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2})\b", re.I)
|
||||
_LONDON_TOKEN_RE = re.compile(r"(^|[^a-z])london([^a-z]|$)", re.I)
|
||||
|
||||
_NOISY_PROVIDER_SUFFIXES = (
|
||||
" higher education corporation",
|
||||
|
|
@ -152,8 +162,7 @@ def _find_header_row(rows: list[tuple]) -> int:
|
|||
for idx, row in enumerate(rows):
|
||||
keys = [_header_key(value) for value in row]
|
||||
has_legal_name = any(
|
||||
all(token in key for token in ("provider", "legal", "name"))
|
||||
for key in keys
|
||||
all(token in key for token in ("provider", "legal", "name")) for key in keys
|
||||
)
|
||||
has_university_title = any(
|
||||
all(token in key for token in ("right", "use", "university"))
|
||||
|
|
@ -235,13 +244,94 @@ def _postcode_lookup(postcodes_path: Path) -> dict[str, tuple[float, float]]:
|
|||
df = pl.read_parquet(
|
||||
postcodes_path,
|
||||
columns=["pcds", "lat", "long", "ctry25cd", "doterm"],
|
||||
).filter((pl.col("ctry25cd") == "E92000001") & pl.col("doterm").is_null())
|
||||
).filter((pl.col("ctry25cd") == ENGLAND_COUNTRY_CODE) & pl.col("doterm").is_null())
|
||||
return {
|
||||
_normalize_postcode(postcode): (float(lat), float(lon))
|
||||
for postcode, lat, lon in df.select(["pcds", "lat", "long"]).iter_rows()
|
||||
}
|
||||
|
||||
|
||||
def _display_city_from_tags(tags: dict[str, str]) -> str | None:
|
||||
"""Use explicit OSM context where available, before we fall back to admin data."""
|
||||
for key in (
|
||||
"is_in",
|
||||
"is_in:city",
|
||||
"is_in:town",
|
||||
"is_in:county",
|
||||
"addr:city",
|
||||
):
|
||||
value = tags.get(key)
|
||||
if value and _LONDON_TOKEN_RE.search(value):
|
||||
return "London"
|
||||
return None
|
||||
|
||||
|
||||
def _is_london_admin_expr() -> pl.Expr:
|
||||
return (
|
||||
(pl.col("rgn25cd") == LONDON_REGION_CODE)
|
||||
| pl.col("lad25cd").str.starts_with(LONDON_LAD_PREFIX).fill_null(False)
|
||||
| pl.col("cty25cd").is_in(LONDON_COUNTY_CODES)
|
||||
)
|
||||
|
||||
|
||||
def _london_postcode_tree(postcodes_path: Path) -> tuple[cKDTree, np.ndarray]:
|
||||
required = [
|
||||
"doterm",
|
||||
"ctry25cd",
|
||||
"east1m",
|
||||
"north1m",
|
||||
"rgn25cd",
|
||||
"lad25cd",
|
||||
"cty25cd",
|
||||
]
|
||||
df = (
|
||||
pl.read_parquet(postcodes_path, columns=required)
|
||||
.filter(
|
||||
(pl.col("ctry25cd") == ENGLAND_COUNTRY_CODE) & pl.col("doterm").is_null()
|
||||
)
|
||||
.filter(pl.col("east1m").is_not_null() & pl.col("north1m").is_not_null())
|
||||
.with_columns(_is_london_admin_expr().alias("is_london"))
|
||||
.select("east1m", "north1m", "is_london")
|
||||
)
|
||||
if df.is_empty():
|
||||
raise ValueError(f"No active England postcodes in {postcodes_path}")
|
||||
|
||||
coords = np.column_stack(
|
||||
[
|
||||
df["east1m"].to_numpy().astype(np.float64),
|
||||
df["north1m"].to_numpy().astype(np.float64),
|
||||
]
|
||||
)
|
||||
london_flags = df["is_london"].to_numpy().astype(bool)
|
||||
return cKDTree(coords), london_flags
|
||||
|
||||
|
||||
def _assign_london_display_city(
|
||||
places: list[dict],
|
||||
postcodes_path: Path,
|
||||
max_distance_m: float = DISPLAY_CITY_NEAREST_POSTCODE_MAX_M,
|
||||
) -> int:
|
||||
"""Tag places whose nearest active postcode is inside Greater London."""
|
||||
if not places:
|
||||
return 0
|
||||
|
||||
tree, london_flags = _london_postcode_tree(postcodes_path)
|
||||
lons = np.array([float(place["lon"]) for place in places], dtype=np.float64)
|
||||
lats = np.array([float(place["lat"]) for place in places], dtype=np.float64)
|
||||
eastings, northings = WGS84_TO_BNG.transform(lons, lats)
|
||||
place_coords = np.column_stack([eastings, northings])
|
||||
distances, indices = tree.query(place_coords)
|
||||
|
||||
assigned = 0
|
||||
for idx, place in enumerate(places):
|
||||
if place.get("display_city") or place.get("place_type") == "city":
|
||||
continue
|
||||
if distances[idx] <= max_distance_m and london_flags[indices[idx]]:
|
||||
place["display_city"] = "London"
|
||||
assigned += 1
|
||||
return assigned
|
||||
|
||||
|
||||
def _ofs_universities(
|
||||
raw: pl.DataFrame, postcode_coords: dict[str, tuple[float, float]]
|
||||
) -> tuple[list[dict], int]:
|
||||
|
|
@ -277,6 +367,7 @@ def _ofs_universities(
|
|||
"lon": lon,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
"display_city": None,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -354,6 +445,7 @@ def _naptan_dlr_stations(naptan_path: Path) -> list[dict]:
|
|||
"lon": station["lon_sum"] / count,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
"display_city": None,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -388,6 +480,7 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
lon: float,
|
||||
population: int,
|
||||
travel_destination: bool,
|
||||
display_city: str | None = None,
|
||||
) -> None:
|
||||
self.places.append(
|
||||
{
|
||||
|
|
@ -397,6 +490,7 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
"lon": lon,
|
||||
"population": population,
|
||||
"travel_destination": travel_destination,
|
||||
"display_city": display_city,
|
||||
}
|
||||
)
|
||||
self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False)
|
||||
|
|
@ -414,18 +508,19 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
if not self._england.contains(Point(lon, lat)):
|
||||
return
|
||||
|
||||
name = n.tags.get("name:en", n.tags.get("name", ""))
|
||||
tags = dict(n.tags)
|
||||
name = tags.get("name:en", tags.get("name", ""))
|
||||
if not name:
|
||||
return
|
||||
|
||||
pop_str = n.tags.get("population", "")
|
||||
pop_str = tags.get("population", "")
|
||||
try:
|
||||
population = int(pop_str)
|
||||
except ValueError:
|
||||
population = 0
|
||||
|
||||
# place=* nodes
|
||||
place_type = n.tags.get("place")
|
||||
place_type = tags.get("place")
|
||||
if place_type in SEARCH_PLACE_TYPES:
|
||||
self._add(
|
||||
name,
|
||||
|
|
@ -434,12 +529,14 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
lon,
|
||||
population,
|
||||
travel_destination=place_type in TRAVEL_DESTINATION_PLACE_TYPES,
|
||||
display_city=None
|
||||
if place_type == "city"
|
||||
else _display_city_from_tags(tags),
|
||||
)
|
||||
return
|
||||
|
||||
# Railway stations (tube, national rail, DLR, overground, Elizabeth line)
|
||||
if n.tags.get("railway") == "station":
|
||||
tags = dict(n.tags)
|
||||
if tags.get("railway") == "station":
|
||||
if _is_tram_station(tags):
|
||||
return
|
||||
display_name = _station_display_name(name, tags)
|
||||
|
|
@ -450,6 +547,7 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
lon,
|
||||
population,
|
||||
travel_destination=True,
|
||||
display_city=_display_city_from_tags(tags),
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -479,7 +577,10 @@ def main() -> None:
|
|||
parser.add_argument(
|
||||
"--postcodes",
|
||||
type=Path,
|
||||
help="Postcode parquet used to geocode OfS university contact postcodes",
|
||||
help=(
|
||||
"Postcode parquet used to geocode OfS university contact postcodes "
|
||||
"and assign Greater London display labels"
|
||||
),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
@ -507,14 +608,18 @@ def main() -> None:
|
|||
added, skipped = _append_ofs_universities(
|
||||
handler.places, args.university_register, args.postcodes
|
||||
)
|
||||
print(
|
||||
f"Added {added:,} university travel destinations from the OfS register"
|
||||
)
|
||||
print(f"Added {added:,} university travel destinations from the OfS register")
|
||||
if skipped:
|
||||
print(f"Skipped {skipped:,} OfS university rows without usable coordinates")
|
||||
|
||||
if handler.places:
|
||||
if args.postcodes:
|
||||
assigned = _assign_london_display_city(handler.places, args.postcodes)
|
||||
print(f"Assigned London display labels to {assigned:,} places")
|
||||
for place in handler.places:
|
||||
place.setdefault("display_city", None)
|
||||
df = pl.DataFrame(handler.places)
|
||||
df = df.with_columns(pl.col("display_city").cast(pl.Utf8))
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.write_parquet(args.output)
|
||||
print(f"Saved to {args.output}")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
import polars as pl
|
||||
from pyproj import Transformer
|
||||
|
||||
from pipeline.download.places import (
|
||||
_assign_london_display_city,
|
||||
_display_city_from_tags,
|
||||
_is_dlr_station,
|
||||
_is_tram_station,
|
||||
_naptan_dlr_stations,
|
||||
|
|
@ -9,6 +12,22 @@ from pipeline.download.places import (
|
|||
_station_display_name,
|
||||
)
|
||||
|
||||
WGS84_TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
|
||||
|
||||
|
||||
def _postcode_row(postcode: str, lat: float, lon: float, *, london: bool) -> dict:
|
||||
easting, northing = WGS84_TO_BNG.transform(lon, lat)
|
||||
return {
|
||||
"pcds": postcode,
|
||||
"doterm": None,
|
||||
"ctry25cd": "E92000001",
|
||||
"east1m": int(round(easting)),
|
||||
"north1m": int(round(northing)),
|
||||
"rgn25cd": "E12000007" if london else "E12000008",
|
||||
"lad25cd": "E09000008" if london else "E07000208",
|
||||
"cty25cd": "E13000002" if london else "E10000030",
|
||||
}
|
||||
|
||||
|
||||
def test_dlr_light_rail_is_not_treated_as_tram():
|
||||
dlr_tags = {
|
||||
|
|
@ -144,5 +163,56 @@ def test_ofs_universities_extracts_university_title_rows_with_postcode_coords():
|
|||
"lon": -1.2643,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
"display_city": None,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_display_city_from_tags_uses_explicit_london_context():
|
||||
assert _display_city_from_tags({"is_in": "Croydon, London, UK"}) == "London"
|
||||
assert _display_city_from_tags({"is_in": "Croydon, Cambridgeshire, UK"}) is None
|
||||
|
||||
|
||||
def test_assign_london_display_city_uses_nearest_active_postcode_admin(tmp_path):
|
||||
postcodes = tmp_path / "postcodes.parquet"
|
||||
pl.DataFrame(
|
||||
[
|
||||
_postcode_row("CR0 1SZ", 51.371273, -0.101793, london=True),
|
||||
_postcode_row("KT19 8AG", 51.3326, -0.2678, london=False),
|
||||
]
|
||||
).write_parquet(postcodes)
|
||||
|
||||
places = [
|
||||
{
|
||||
"name": "Croydon",
|
||||
"place_type": "town",
|
||||
"lat": 51.3713049,
|
||||
"lon": -0.101957,
|
||||
"population": 173314,
|
||||
"travel_destination": False,
|
||||
"display_city": None,
|
||||
},
|
||||
{
|
||||
"name": "East Croydon railway station",
|
||||
"place_type": "station",
|
||||
"lat": 51.375845,
|
||||
"lon": -0.092732,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
"display_city": None,
|
||||
},
|
||||
{
|
||||
"name": "Epsom",
|
||||
"place_type": "town",
|
||||
"lat": 51.3326,
|
||||
"lon": -0.2678,
|
||||
"population": 31489,
|
||||
"travel_destination": False,
|
||||
"display_city": None,
|
||||
},
|
||||
]
|
||||
|
||||
assigned = _assign_london_display_city(places, postcodes)
|
||||
|
||||
assert assigned == 2
|
||||
assert [place["display_city"] for place in places] == ["London", "London", None]
|
||||
|
|
|
|||
|
|
@ -139,10 +139,15 @@ if [ ! -f "$NETWORK_DIR/network.dat" ]; then
|
|||
fi
|
||||
|
||||
# --- Step 5: Run batch ---
|
||||
# Use a repo-local temp dir so DuckDB's JNI .so can be mapped executable
|
||||
# (system /tmp is often mounted noexec, which breaks System.load).
|
||||
TMP_DIR="$R5_DIR/tmp"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
echo ""
|
||||
echo "--- Starting batch computation ---"
|
||||
DATA_DIR="$NETWORK_DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
|
||||
java -Xms"$HEAP" -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
||||
java -Xms"$HEAP" -Xmx"$HEAP" -Djava.io.tmpdir="$TMP_DIR" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
|
||||
--postcodes property-data/arcgis_data.parquet \
|
||||
--places property-data/places.parquet \
|
||||
--output-dir "$OUTPUT_BASE" \
|
||||
|
|
|
|||
456
server-rs/Cargo.lock
generated
456
server-rs/Cargo.lock
generated
|
|
@ -2,6 +2,15 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
|
|
@ -683,6 +692,21 @@ dependencies = [
|
|||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.76"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base16ct"
|
||||
version = "0.1.1"
|
||||
|
|
@ -772,6 +796,15 @@ dependencies = [
|
|||
"hybrid-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block2"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5"
|
||||
dependencies = [
|
||||
"objc2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "boxcar"
|
||||
version = "0.2.14"
|
||||
|
|
@ -1295,6 +1328,16 @@ version = "0.1.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7eed2c4702fa172d1ce21078faa7c5203e69f5394d48cc436d25928394a867a2"
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der"
|
||||
version = "0.6.1"
|
||||
|
|
@ -1338,6 +1381,16 @@ dependencies = [
|
|||
"ctutils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dispatch2"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"objc2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "displaydoc"
|
||||
version = "0.2.5"
|
||||
|
|
@ -1532,6 +1585,18 @@ version = "0.1.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
||||
|
||||
[[package]]
|
||||
name = "findshlibs"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.9"
|
||||
|
|
@ -1773,6 +1838,12 @@ dependencies = [
|
|||
"wasip3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.3"
|
||||
|
|
@ -1947,6 +2018,17 @@ dependencies = [
|
|||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.12"
|
||||
|
|
@ -2675,6 +2757,18 @@ dependencies = [
|
|||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.30.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"cfg_aliases",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "now"
|
||||
version = "0.1.3"
|
||||
|
|
@ -2738,6 +2832,36 @@ dependencies = [
|
|||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f"
|
||||
dependencies = [
|
||||
"objc2-encode",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-cloud-kit"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73ad74d880bb43877038da939b7427bba67e9dd42004a18b809ba7d87cee241c"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-data"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b402a653efbb5e82ce4df10683b6b28027616a2715e90009947d50b8dd298fa"
|
||||
dependencies = [
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-foundation"
|
||||
version = "0.3.2"
|
||||
|
|
@ -2745,6 +2869,72 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"dispatch2",
|
||||
"objc2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-graphics"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e022c9d066895efa1345f8e33e584b9f958da2fd4cd116792e15e07e4720a807"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"dispatch2",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
"objc2-io-surface",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-image"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d563b38d2b97209f8e861173de434bd0214cf020e3423a52624cd1d989f006"
|
||||
dependencies = [
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-location"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca347214e24bc973fc025fd0d36ebb179ff30536ed1f80252706db19ee452009"
|
||||
dependencies = [
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-text"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cde0dfb48d25d2b4862161a4d5fcc0e3c24367869ad306b0c9ec0073bfed92d"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
"objc2-core-graphics",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-encode"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
|
||||
|
||||
[[package]]
|
||||
name = "objc2-foundation"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"block2",
|
||||
"libc",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2757,6 +2947,60 @@ dependencies = [
|
|||
"objc2-core-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-io-surface"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180788110936d59bab6bd83b6060ffdfffb3b922ba1396b312ae795e1de9d81d"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-quartz-core"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96c1358452b371bf9f104e21ec536d37a650eb10f7ee379fff67d2e08d537f1f"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-ui-kit"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d87d638e33c06f577498cbcc50491496a3ed4246998a7fbba7ccb98b1e7eab22"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"block2",
|
||||
"objc2",
|
||||
"objc2-cloud-kit",
|
||||
"objc2-core-data",
|
||||
"objc2-core-foundation",
|
||||
"objc2-core-graphics",
|
||||
"objc2-core-image",
|
||||
"objc2-core-location",
|
||||
"objc2-core-text",
|
||||
"objc2-foundation",
|
||||
"objc2-quartz-core",
|
||||
"objc2-user-notifications",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-user-notifications"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9df9128cbbfef73cda168416ccf7f837b62737d748333bfe9ab71c245d76613e"
|
||||
dependencies = [
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.37.3"
|
||||
|
|
@ -2831,6 +3075,22 @@ dependencies = [
|
|||
"hashbrown 0.14.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_info"
|
||||
version = "3.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4022a17595a00d6a369236fdae483f0de7f0a339960a53118b818238e132224"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"log",
|
||||
"nix",
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
"objc2-ui-kit",
|
||||
"serde",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outref"
|
||||
version = "0.5.2"
|
||||
|
|
@ -2926,6 +3186,26 @@ dependencies = [
|
|||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.17"
|
||||
|
|
@ -3616,6 +3896,7 @@ dependencies = [
|
|||
"reqwest 0.13.3",
|
||||
"rust_xlsxwriter",
|
||||
"rustc-hash",
|
||||
"sentry",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2 0.11.0",
|
||||
|
|
@ -3935,6 +4216,7 @@ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
|
|||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.4.14",
|
||||
|
|
@ -4111,6 +4393,12 @@ dependencies = [
|
|||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.2"
|
||||
|
|
@ -4346,6 +4634,130 @@ version = "1.0.28"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d92d893ba7469d361a6958522fa440e4e2bc8bf4c5803cd1bf40b9af63f8f9a8"
|
||||
dependencies = [
|
||||
"cfg_aliases",
|
||||
"httpdate",
|
||||
"reqwest 0.12.28",
|
||||
"rustls 0.23.40",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
"sentry-debug-images",
|
||||
"sentry-panic",
|
||||
"sentry-tower",
|
||||
"sentry-tracing",
|
||||
"tokio",
|
||||
"ureq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f8784d0a27b5cd4b5f75769ffc84f0b7580e3c35e1af9cd83cb90b612d769cc"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"regex",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e5eb42f4cd4f9fdfec9e3b07b25a4c9769df83d218a7e846658984d5948ad3e"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
"os_info",
|
||||
"rustc_version",
|
||||
"sentry-core",
|
||||
"uname",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0b1e7ca40f965db239da279bf278d87b7407469b98835f27f0c8e59ed189b06"
|
||||
dependencies = [
|
||||
"rand 0.9.4",
|
||||
"sentry-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-debug-images"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "002561e49ea3a9de316e2efadc40fae553921b8ff41448f02ea85fd135a778d6"
|
||||
dependencies = [
|
||||
"findshlibs",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8906f8be87aea5ac7ef937323fb655d66607427f61007b99b7cb3504dc5a156c"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-tower"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56aebe376310840b49dad4cca55c7b32d9abdc14946cd071d4158ecb149b63a4"
|
||||
dependencies = [
|
||||
"axum",
|
||||
"http 1.4.0",
|
||||
"pin-project",
|
||||
"sentry-core",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-tracing"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b07eefe04486316c57aba08ab53dd44753c25102d1d3fe05775cc93a13262d9"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.46.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "567711f01f86a842057e1fc17779eba33a336004227e1a1e7e6cc2599e22e259"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"hex",
|
||||
"rand 0.9.4",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
|
|
@ -5147,6 +5559,15 @@ version = "1.20.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
|
||||
|
||||
[[package]]
|
||||
name = "uname"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.9.0"
|
||||
|
|
@ -5207,6 +5628,34 @@ version = "0.0.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"log",
|
||||
"percent-encoding",
|
||||
"rustls 0.23.40",
|
||||
"rustls-pki-types",
|
||||
"ureq-proto",
|
||||
"utf8-zero",
|
||||
"webpki-roots",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ureq-proto"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"http 1.4.0",
|
||||
"httparse",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.5.8"
|
||||
|
|
@ -5217,6 +5666,7 @@ dependencies = [
|
|||
"idna",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5225,6 +5675,12 @@ version = "2.1.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-zero"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8c0a043c9540bae7c578c88f91dda8bd82e59ae27c21baca69c8b191aaf5a6e"
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ sha2 = "0.11"
|
|||
hex = "0.4"
|
||||
tower = { version = "0.5", features = ["limit"] }
|
||||
libc = "0.2"
|
||||
sentry = { version = "0.46.0", default-features = false, features = ["backtrace", "contexts", "debug-images", "panic", "reqwest", "rustls", "tracing", "tower-http", "tower-axum-matched-path"] }
|
||||
|
||||
[lints.clippy]
|
||||
min_ident_chars = "warn"
|
||||
|
|
|
|||
80
server-rs/src/bugsink.rs
Normal file
80
server-rs/src/bugsink.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BackendConfig {
|
||||
pub dsn: Option<String>,
|
||||
pub environment: Option<String>,
|
||||
pub release: Option<String>,
|
||||
pub send_default_pii: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FrontendConfig {
|
||||
pub dsn: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub environment: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub release: Option<String>,
|
||||
pub send_default_pii: bool,
|
||||
}
|
||||
|
||||
pub fn env_nonempty(name: &str) -> Option<String> {
|
||||
std::env::var(name).ok().and_then(nonempty)
|
||||
}
|
||||
|
||||
pub fn nonempty(value: String) -> Option<String> {
|
||||
let trimmed = value.trim();
|
||||
(!trimmed.is_empty()).then(|| trimmed.to_owned())
|
||||
}
|
||||
|
||||
pub fn default_release() -> String {
|
||||
format!("{}@{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||
}
|
||||
|
||||
pub fn init_backend(config: &BackendConfig) -> Option<sentry::ClientInitGuard> {
|
||||
let dsn = config.dsn.clone().and_then(nonempty)?;
|
||||
|
||||
let dsn = match dsn.parse::<sentry::types::Dsn>() {
|
||||
Ok(dsn) => dsn,
|
||||
Err(err) => {
|
||||
eprintln!("Ignoring invalid BUGSINK_DSN: {err}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
Some(sentry::init(sentry::ClientOptions {
|
||||
dsn: Some(dsn),
|
||||
environment: config
|
||||
.environment
|
||||
.clone()
|
||||
.and_then(nonempty)
|
||||
.map(Cow::Owned),
|
||||
release: Some(Cow::Owned(
|
||||
config
|
||||
.release
|
||||
.clone()
|
||||
.and_then(nonempty)
|
||||
.unwrap_or_else(default_release),
|
||||
)),
|
||||
send_default_pii: config.send_default_pii,
|
||||
traces_sample_rate: 0.0,
|
||||
..Default::default()
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn frontend_config(
|
||||
dsn: Option<String>,
|
||||
environment: Option<String>,
|
||||
release: Option<String>,
|
||||
send_default_pii: bool,
|
||||
) -> Option<FrontendConfig> {
|
||||
dsn.and_then(nonempty).map(|dsn| FrontendConfig {
|
||||
dsn,
|
||||
environment: environment.and_then(nonempty),
|
||||
release: release.and_then(nonempty),
|
||||
send_default_pii,
|
||||
})
|
||||
}
|
||||
|
|
@ -21,6 +21,16 @@ pub struct PlaceData {
|
|||
pub travel_destination: Vec<bool>,
|
||||
}
|
||||
|
||||
pub(super) struct CityCandidate<'a> {
|
||||
pub(super) name: &'a str,
|
||||
pub(super) lat: f32,
|
||||
pub(super) lon: f32,
|
||||
}
|
||||
|
||||
const PARENT_CITY_MAX_DIST_SQ: f32 = 0.81;
|
||||
const LONDON_DISPLAY_MAX_DEGREES: f32 = 30.0 / 111.0;
|
||||
const LONDON_DISPLAY_MAX_DIST_SQ: f32 = LONDON_DISPLAY_MAX_DEGREES * LONDON_DISPLAY_MAX_DEGREES;
|
||||
|
||||
fn type_rank(place_type: &str) -> u8 {
|
||||
match place_type {
|
||||
"city" => 0,
|
||||
|
|
@ -37,6 +47,53 @@ pub fn is_travel_destination_type(place_type: &str) -> bool {
|
|||
matches!(place_type, "city" | "station" | "university")
|
||||
}
|
||||
|
||||
fn distance_sq(lat: f32, lon: f32, city: &CityCandidate<'_>) -> f32 {
|
||||
let cos_lat = lat.to_radians().cos();
|
||||
let dlat = city.lat - lat;
|
||||
let dlon = (city.lon - lon) * cos_lat;
|
||||
dlat * dlat + dlon * dlon
|
||||
}
|
||||
|
||||
fn is_london_city_name(name: &str) -> bool {
|
||||
matches!(name, "London" | "Westminster" | "City of London")
|
||||
}
|
||||
|
||||
pub(super) fn nearest_display_city<'a>(
|
||||
lat: f32,
|
||||
lon: f32,
|
||||
cities: &'a [CityCandidate<'a>],
|
||||
) -> Option<&'a str> {
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&CityCandidate<'_>> = None;
|
||||
let mut london_dist_sq: Option<f32> = None;
|
||||
|
||||
for city in cities {
|
||||
let dist_sq = distance_sq(lat, lon, city);
|
||||
if city.name == "London" {
|
||||
london_dist_sq = Some(dist_sq);
|
||||
}
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(city);
|
||||
}
|
||||
}
|
||||
|
||||
let best_city = best_city?;
|
||||
if best_dist_sq >= PARENT_CITY_MAX_DIST_SQ {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_london_city_name(best_city.name) {
|
||||
if london_dist_sq.is_some_and(|dist_sq| dist_sq < LONDON_DISPLAY_MAX_DIST_SQ) {
|
||||
Some("London")
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
Some(best_city.name)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn normalize_search_text(text: &str) -> String {
|
||||
let mut result = String::with_capacity(text.len());
|
||||
let mut last_was_space = true;
|
||||
|
|
@ -182,6 +239,25 @@ fn extract_bool_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<bool>> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn extract_optional_str_col(
|
||||
df: &DataFrame,
|
||||
name: &str,
|
||||
) -> anyhow::Result<Option<Vec<Option<String>>>> {
|
||||
let column = match df.column(name) {
|
||||
Ok(column) => column,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(Some(
|
||||
string_column
|
||||
.into_iter()
|
||||
.map(|value| value.map(ToString::to_string))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
impl PlaceData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
|
|
@ -227,6 +303,7 @@ impl PlaceData {
|
|||
.map(|place_type| is_travel_destination_type(place_type))
|
||||
.collect()
|
||||
};
|
||||
let display_city_override = extract_optional_str_col(&df, "display_city")?;
|
||||
|
||||
// Precompute nearest city for each non-city place
|
||||
let city_indices: Vec<usize> = type_rank_vec
|
||||
|
|
@ -234,37 +311,45 @@ impl PlaceData {
|
|||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
let city_candidates: Vec<CityCandidate<'_>> = city_indices
|
||||
.iter()
|
||||
.map(|&idx| CityCandidate {
|
||||
name: &name[idx],
|
||||
lat: lat[idx],
|
||||
lon: lon[idx],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let city: Vec<Option<String>> = (0..row_count)
|
||||
let fallback_city: Vec<Option<String>> = (0..row_count)
|
||||
.map(|idx| {
|
||||
if type_rank_vec[idx] == 0 {
|
||||
return None; // Cities don't need a city label
|
||||
}
|
||||
let plat = lat[idx];
|
||||
let plon = lon[idx];
|
||||
let cos_lat = (plat.to_radians()).cos();
|
||||
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&str> = None;
|
||||
for &ci in &city_indices {
|
||||
let dlat = lat[ci] - plat;
|
||||
let dlon = (lon[ci] - plon) * cos_lat;
|
||||
let dist_sq = dlat * dlat + dlon * dlon;
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(&name[ci]);
|
||||
}
|
||||
}
|
||||
|
||||
// ~100km threshold: 1° ≈ 111km, so 0.9° ≈ 100km → 0.81 squared
|
||||
if best_dist_sq < 0.81 {
|
||||
best_city.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
nearest_display_city(lat[idx], lon[idx], &city_candidates).map(str::to_string)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let city: Vec<Option<String>> = if let Some(display_city_override) = display_city_override {
|
||||
fallback_city
|
||||
.into_iter()
|
||||
.zip(display_city_override)
|
||||
.enumerate()
|
||||
.map(|(idx, (fallback, override_city))| {
|
||||
if type_rank_vec[idx] == 0 {
|
||||
return None;
|
||||
}
|
||||
override_city
|
||||
.and_then(|value| {
|
||||
let trimmed = value.trim();
|
||||
(!trimmed.is_empty()).then(|| trimmed.to_string())
|
||||
})
|
||||
.or(fallback)
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
fallback_city
|
||||
};
|
||||
|
||||
let with_pop = population.iter().filter(|&&pop| pop > 0).count();
|
||||
let with_city = city.iter().filter(|c| c.is_some()).count();
|
||||
info!(
|
||||
|
|
@ -294,6 +379,36 @@ impl PlaceData {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn test_city_candidates() -> Vec<CityCandidate<'static>> {
|
||||
vec![
|
||||
CityCandidate {
|
||||
name: "London",
|
||||
lat: 51.5074456,
|
||||
lon: -0.1277653,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Westminster",
|
||||
lat: 51.4973206,
|
||||
lon: -0.137149,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "City of London",
|
||||
lat: 51.5156177,
|
||||
lon: -0.0919983,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Cambridge",
|
||||
lat: 52.2055314,
|
||||
lon: 0.1186637,
|
||||
},
|
||||
CityCandidate {
|
||||
name: "Oxford",
|
||||
lat: 51.7520131,
|
||||
lon: -1.2578499,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_rank_ordering() {
|
||||
assert!(type_rank("city") < type_rank("town"));
|
||||
|
|
@ -316,4 +431,41 @@ mod tests {
|
|||
assert!(!is_travel_destination_type("town"));
|
||||
assert!(!is_travel_destination_type("suburb"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_canonicalizes_greater_london_aliases() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(51.3713049, -0.101957, &cities),
|
||||
Some("London")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_preserves_non_london_duplicates() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(52.1277704, -0.0813098, &cities),
|
||||
Some("Cambridge")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_does_not_leak_westminster_label_past_london_guard() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(nearest_display_city(51.5093, -0.5954, &cities), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nearest_display_city_keeps_normal_non_london_city() {
|
||||
let cities = test_city_candidates();
|
||||
|
||||
assert_eq!(
|
||||
nearest_display_city(51.456659, -0.969651, &cities),
|
||||
Some("Oxford")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::fs;
|
|||
use std::path::Path;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::places::{nearest_display_city, CityCandidate};
|
||||
use super::PlaceData;
|
||||
|
||||
/// Precomputed outcode data derived from postcode boundaries.
|
||||
|
|
@ -58,29 +59,18 @@ impl OutcodeData {
|
|||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
let city_candidates: Vec<CityCandidate<'_>> = city_indices
|
||||
.iter()
|
||||
.map(|&idx| CityCandidate {
|
||||
name: &place_data.name[idx],
|
||||
lat: place_data.lat[idx],
|
||||
lon: place_data.lon[idx],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let cities: Vec<Option<String>> = centroids
|
||||
.iter()
|
||||
.map(|&(lat, lon)| {
|
||||
let cos_lat = lat.to_radians().cos();
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&str> = None;
|
||||
for &ci in &city_indices {
|
||||
let dlat = place_data.lat[ci] - lat;
|
||||
let dlon = (place_data.lon[ci] - lon) * cos_lat;
|
||||
let dist_sq = dlat * dlat + dlon * dlon;
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(&place_data.name[ci]);
|
||||
}
|
||||
}
|
||||
// ~100km threshold
|
||||
if best_dist_sq < 0.81 {
|
||||
best_city.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.map(|&(lat, lon)| nearest_display_city(lat, lon, &city_candidates).map(str::to_string))
|
||||
.collect();
|
||||
|
||||
info!(
|
||||
|
|
|
|||
|
|
@ -160,6 +160,11 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
name: "Defining characteristics",
|
||||
features: &[
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Street tree density percentile",
|
||||
bounds: Bounds::Fixed {
|
||||
|
|
@ -175,6 +180,21 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
raw: false,
|
||||
absolute: true,
|
||||
}),
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Noise (dB)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 50.0,
|
||||
max: 80.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Maximum transport noise level near the postcode in decibels (Lden)",
|
||||
detail: "Maximum road, rail, or airport noise level in decibels (Lden, a 24-hour weighted average) from Defra's Strategic Noise Mapping Round 4 (2022). Modelled at 4m above ground on a 10m grid and sampled as the maximum 10m cell around the postcode representative point. Above ~55 dB is typically noticeable; above ~70 dB is considered harmful by the WHO.",
|
||||
source: "noise",
|
||||
prefix: "",
|
||||
suffix: " dB",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -270,7 +290,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
name: "Education",
|
||||
name: "Schools",
|
||||
features: &[
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Good+ primary schools within 2km",
|
||||
|
|
@ -983,21 +1003,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
FeatureGroup {
|
||||
name: "Amenities",
|
||||
features: &[
|
||||
Feature::Numeric(FeatureConfig {
|
||||
name: "Noise (dB)",
|
||||
bounds: Bounds::Fixed {
|
||||
min: 50.0,
|
||||
max: 80.0,
|
||||
},
|
||||
step: 1.0,
|
||||
description: "Maximum transport noise level near the postcode in decibels (Lden)",
|
||||
detail: "Maximum road, rail, or airport noise level in decibels (Lden, a 24-hour weighted average) from Defra's Strategic Noise Mapping Round 4 (2022). Modelled at 4m above ground on a 10m grid and sampled as the maximum 10m cell around the postcode representative point. Above ~55 dB is typically noticeable; above ~70 dB is considered harmful by the WHO.",
|
||||
source: "noise",
|
||||
prefix: "",
|
||||
suffix: " dB",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
}),
|
||||
Feature::Enum(EnumFeatureConfig {
|
||||
name: "Max available download speed (Mbps)",
|
||||
order: Some(&["10", "30", "100", "300", "1000"]),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
mod aggregation;
|
||||
mod auth;
|
||||
mod bugsink;
|
||||
mod checkout_sessions;
|
||||
mod consts;
|
||||
mod data;
|
||||
|
|
@ -29,6 +30,7 @@ use axum::Router;
|
|||
use clap::Parser;
|
||||
use consts::SERVICE_CALL_TIMEOUT;
|
||||
use tower::limit::ConcurrencyLimitLayer;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::compression::CompressionLayer;
|
||||
|
||||
use tower_http::cors::{AllowHeaders, AllowMethods, CorsLayer};
|
||||
|
|
@ -223,10 +225,87 @@ struct Cli {
|
|||
/// Google OAuth client secret for PocketBase SSO
|
||||
#[arg(long, env = "GOOGLE_OAUTH_CLIENT_SECRET")]
|
||||
google_oauth_client_secret: String,
|
||||
|
||||
/// Bugsink DSN for backend error reporting
|
||||
#[arg(long, env = "BUGSINK_DSN", hide_env_values = true)]
|
||||
bugsink_dsn: Option<String>,
|
||||
|
||||
/// Bugsink DSN injected into the browser app; falls back to BUGSINK_DSN when omitted
|
||||
#[arg(long, env = "FRONTEND_BUGSINK_DSN", hide_env_values = true)]
|
||||
frontend_bugsink_dsn: Option<String>,
|
||||
|
||||
/// Bugsink/Sentry environment name
|
||||
#[arg(long, env = "BUGSINK_ENVIRONMENT")]
|
||||
bugsink_environment: Option<String>,
|
||||
|
||||
/// Bugsink/Sentry release name
|
||||
#[arg(long, env = "BUGSINK_RELEASE")]
|
||||
bugsink_release: Option<String>,
|
||||
|
||||
/// Include default PII in Bugsink events
|
||||
#[arg(long, env = "BUGSINK_SEND_DEFAULT_PII", default_value_t = false)]
|
||||
bugsink_send_default_pii: bool,
|
||||
}
|
||||
|
||||
async fn capture_server_error_responses(
|
||||
request: axum::extract::Request,
|
||||
next: middleware::Next,
|
||||
) -> axum::response::Response {
|
||||
let method = request.method().clone();
|
||||
let path = request.uri().path().to_owned();
|
||||
let response = next.run(request).await;
|
||||
let status = response.status();
|
||||
|
||||
if status.is_server_error() {
|
||||
sentry::with_scope(
|
||||
|scope| {
|
||||
scope.set_tag("http.status_code", status.as_u16().to_string());
|
||||
scope.set_tag("http.method", method.to_string());
|
||||
scope.set_tag("http.route", path.clone());
|
||||
},
|
||||
|| {
|
||||
sentry::capture_message(
|
||||
&format!("HTTP {status} response from {method} {path}"),
|
||||
sentry::Level::Error,
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
response
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
let bugsink_environment = cli
|
||||
.bugsink_environment
|
||||
.clone()
|
||||
.or_else(|| bugsink::env_nonempty("SENTRY_ENVIRONMENT"));
|
||||
let bugsink_release = cli
|
||||
.bugsink_release
|
||||
.clone()
|
||||
.or_else(|| bugsink::env_nonempty("SENTRY_RELEASE"));
|
||||
let backend_bugsink_dsn = cli
|
||||
.bugsink_dsn
|
||||
.clone()
|
||||
.or_else(|| bugsink::env_nonempty("SENTRY_DSN"));
|
||||
let _bugsink_guard = bugsink::init_backend(&bugsink::BackendConfig {
|
||||
dsn: backend_bugsink_dsn.clone(),
|
||||
environment: bugsink_environment.clone(),
|
||||
release: bugsink_release.clone(),
|
||||
send_default_pii: cli.bugsink_send_default_pii,
|
||||
});
|
||||
let bugsink_frontend_config = bugsink::frontend_config(
|
||||
cli.frontend_bugsink_dsn
|
||||
.clone()
|
||||
.or_else(|| bugsink::env_nonempty("PUBLIC_BUGSINK_DSN"))
|
||||
.or(backend_bugsink_dsn),
|
||||
bugsink_environment.clone(),
|
||||
bugsink_release.clone(),
|
||||
cli.bugsink_send_default_pii,
|
||||
);
|
||||
|
||||
let file_appender = tracing_appender::rolling::daily("logs", "server.log");
|
||||
let (non_blocking, _guard) = tracing_appender::non_blocking(file_appender);
|
||||
|
||||
|
|
@ -234,6 +313,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(sentry::integrations::tracing::layer())
|
||||
.with(tracing_subscriber::fmt::layer().with_ansi(true))
|
||||
.with(
|
||||
tracing_subscriber::fmt::layer()
|
||||
|
|
@ -245,8 +325,9 @@ async fn main() -> anyhow::Result<()> {
|
|||
// Initialize Prometheus metrics
|
||||
let metrics_handle = metrics::init_metrics();
|
||||
info!("Prometheus metrics initialized");
|
||||
|
||||
let cli = Cli::parse();
|
||||
if bugsink_frontend_config.is_some() || _bugsink_guard.is_some() {
|
||||
info!("Bugsink error reporting configured");
|
||||
}
|
||||
|
||||
for (label, path) in [
|
||||
("Properties", &cli.properties),
|
||||
|
|
@ -483,6 +564,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
stripe_secret_key: cli.stripe_secret_key,
|
||||
stripe_webhook_secret: cli.stripe_webhook_secret,
|
||||
stripe_referral_coupon_id: cli.stripe_referral_coupon_id,
|
||||
bugsink_frontend_config,
|
||||
};
|
||||
|
||||
let shared = Arc::new(SharedState::new(app_state));
|
||||
|
|
@ -670,9 +752,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
.route("/health", get(|| async { "ok" }))
|
||||
.route(
|
||||
"/metrics",
|
||||
get(move |connect_info| {
|
||||
metrics::metrics_handler(metrics_handle.clone(), connect_info)
|
||||
}),
|
||||
get(move |connect_info| metrics::metrics_handler(metrics_handle.clone(), connect_info)),
|
||||
)
|
||||
.with_state(shared.clone());
|
||||
|
||||
|
|
@ -696,9 +776,17 @@ async fn main() -> anyhow::Result<()> {
|
|||
},
|
||||
))
|
||||
.layer(middleware::from_fn(static_cache_headers))
|
||||
.layer(middleware::from_fn(capture_server_error_responses))
|
||||
.layer(cors)
|
||||
.layer(CompressionLayer::new().zstd(true).gzip(true))
|
||||
.layer(TraceLayer::new_for_http());
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
.layer(sentry::integrations::tower::NewSentryLayer::<
|
||||
axum::extract::Request,
|
||||
>::new_from_top())
|
||||
.layer(sentry::integrations::tower::SentryHttpLayer::new()),
|
||||
);
|
||||
|
||||
// Lock all current and future memory pages to prevent swapping
|
||||
unsafe {
|
||||
|
|
|
|||
|
|
@ -174,9 +174,9 @@ fn is_same_network(ip: IpAddr) -> bool {
|
|||
v6.is_loopback()
|
||||
|| (v6.segments()[0] & 0xfe00) == 0xfc00
|
||||
|| (v6.segments()[0] & 0xffc0) == 0xfe80
|
||||
|| v6.to_ipv4_mapped().is_some_and(|v4| {
|
||||
v4.is_loopback() || v4.is_private() || v4.is_link_local()
|
||||
})
|
||||
|| v6
|
||||
.to_ipv4_mapped()
|
||||
.is_some_and(|v4| v4.is_loopback() || v4.is_private() || v4.is_link_local())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ use crate::state::AppState;
|
|||
|
||||
const OG_PLACEHOLDER: &str =
|
||||
r#"<meta name="x-og-placeholder" content="__PERFECT_POSTCODE_OG_TAGS__"/>"#;
|
||||
const BUGSINK_CONFIG_PLACEHOLDER: &str = "__PERFECT_POSTCODE_BUGSINK_CONFIG__";
|
||||
|
||||
const HTML_BODY_LIMIT: usize = 5 * 1024 * 1024;
|
||||
|
||||
|
|
@ -318,6 +319,26 @@ fn inject_tags(mut html: String, page: &SeoPage, tags: &str) -> String {
|
|||
html
|
||||
}
|
||||
|
||||
fn escape_json_for_script_tag(json: &str) -> String {
|
||||
json.replace('&', "\\u0026")
|
||||
.replace('<', "\\u003c")
|
||||
.replace('>', "\\u003e")
|
||||
}
|
||||
|
||||
fn inject_bugsink_config(html: String, config: Option<&crate::bugsink::FrontendConfig>) -> String {
|
||||
if !html.contains(BUGSINK_CONFIG_PLACEHOLDER) {
|
||||
return html;
|
||||
}
|
||||
|
||||
let json = config
|
||||
.and_then(|config| serde_json::to_string(config).ok())
|
||||
.unwrap_or_else(|| "{}".to_string());
|
||||
html.replace(
|
||||
BUGSINK_CONFIG_PLACEHOLDER,
|
||||
&escape_json_for_script_tag(&json),
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn og_middleware(request: Request, next: Next) -> Response {
|
||||
let path = request.uri().path().to_string();
|
||||
// Capture the query string before passing the request through
|
||||
|
|
@ -360,10 +381,10 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
|
|||
None => return response,
|
||||
};
|
||||
|
||||
let page = match seo_page_for_path(&path) {
|
||||
Some(page) => page,
|
||||
None => return response,
|
||||
};
|
||||
let page = seo_page_for_path(&path);
|
||||
if page.is_none() && state.bugsink_frontend_config.is_none() {
|
||||
return response;
|
||||
}
|
||||
|
||||
let (mut parts, body) = response.into_parts();
|
||||
let bytes = match to_bytes(body, HTML_BODY_LIMIT).await {
|
||||
|
|
@ -377,8 +398,11 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
|
|||
};
|
||||
|
||||
let html = String::from_utf8_lossy(&bytes).into_owned();
|
||||
let mut html = inject_bugsink_config(html, state.bugsink_frontend_config.as_ref());
|
||||
if let Some(page) = page {
|
||||
let tags = route_seo_tags(&page, &path, &query_string, &state.public_url, language);
|
||||
let html = inject_tags(html, &page, &tags);
|
||||
html = inject_tags(html, &page, &tags);
|
||||
}
|
||||
parts.headers.remove(header::CONTENT_LENGTH);
|
||||
Response::from_parts(parts, Body::from(html))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,13 @@ use crate::data::{Histogram, PropertyData};
|
|||
use crate::features::{self, Feature, FEATURE_GROUPS};
|
||||
use crate::state::SharedState;
|
||||
|
||||
const FILTER_GROUP_ORDER: &[&str] = &["Transport", "Property prices", "Properties", "Amenities"];
|
||||
const FILTER_GROUP_ORDER: &[&str] = &[
|
||||
"Transport",
|
||||
"Property prices",
|
||||
"Properties",
|
||||
"Defining characteristics",
|
||||
"Amenities",
|
||||
];
|
||||
const LAST_FILTER_GROUPS: &[&str] = &["Area development"];
|
||||
const POI_DISTANCE_SLIDER_MIN_KM: f32 = 0.0;
|
||||
const POI_DISTANCE_SLIDER_MAX_KM: f32 = 5.0;
|
||||
|
|
@ -268,11 +274,12 @@ mod tests {
|
|||
fn orders_filter_groups_for_backend_response() {
|
||||
let mut groups = vec![
|
||||
group("Properties"),
|
||||
group("Education"),
|
||||
group("Schools"),
|
||||
group("Area development"),
|
||||
group("Property prices"),
|
||||
group("Crime"),
|
||||
group("Neighbours"),
|
||||
group("Defining characteristics"),
|
||||
group("Amenities"),
|
||||
group("Transport"),
|
||||
];
|
||||
|
|
@ -286,8 +293,9 @@ mod tests {
|
|||
"Transport",
|
||||
"Property prices",
|
||||
"Properties",
|
||||
"Defining characteristics",
|
||||
"Amenities",
|
||||
"Education",
|
||||
"Schools",
|
||||
"Crime",
|
||||
"Neighbours",
|
||||
"Area development",
|
||||
|
|
|
|||
|
|
@ -633,7 +633,8 @@ mod tests {
|
|||
|
||||
assert!(fields_specified);
|
||||
assert!(field_set.contains("Property type"));
|
||||
assert!(!field_set.contains("Noise (dB)"));
|
||||
assert!(field_set.contains("Street tree density percentile"));
|
||||
assert!(field_set.contains("Noise (dB)"));
|
||||
assert!(!field_set.contains("Max available download speed (Mbps)"));
|
||||
assert!(!field_set.contains("Distance to nearest amenity (Cafe) (km)"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::Extension;
|
||||
use axum::extract::State;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::IntoResponse;
|
||||
use axum::response::Json;
|
||||
use axum::Extension;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::data::{PlaceData, slugify};
|
||||
use crate::data::{slugify, PlaceData};
|
||||
use crate::licensing::{check_license_point, resolve_share_code};
|
||||
use crate::state::SharedState;
|
||||
use crate::utils::normalize_postcode;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use parking_lot::RwLock;
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::auth::TokenCache;
|
||||
use crate::bugsink::FrontendConfig as BugsinkFrontendConfig;
|
||||
use crate::data::{
|
||||
OutcodeData, POICategoryGroup, POIData, PlaceData, PostcodeData, PropertyData, TravelTimeStore,
|
||||
};
|
||||
|
|
@ -77,6 +78,8 @@ pub struct AppState {
|
|||
pub stripe_webhook_secret: String,
|
||||
/// Stripe Coupon ID for referral discounts
|
||||
pub stripe_referral_coupon_id: String,
|
||||
/// Bugsink/Sentry-compatible browser error reporting config injected into served HTML.
|
||||
pub bugsink_frontend_config: Option<BugsinkFrontendConfig>,
|
||||
}
|
||||
|
||||
/// Wraps AppState for shared access across route handlers.
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ export type Activity =
|
|||
| { kind: 'scrollPane'; selector: string; top: number; durationMs: number }
|
||||
/**
|
||||
* Click the header of a collapsible filter group (e.g. "Transport",
|
||||
* "Education") so the cards beneath it become visible. Idempotent —
|
||||
* "Schools") so the cards beneath it become visible. Idempotent —
|
||||
* if the group is already open this is a no-op click.
|
||||
*/
|
||||
| { kind: 'openFilterGroup'; selector: string; durationMs: number };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue