More fixes

This commit is contained in:
Andras Schmelczer 2026-03-18 22:46:08 +00:00
parent 15fa09430b
commit 6b12e21d50
54 changed files with 1665 additions and 630 deletions

View file

@ -1,5 +1,5 @@
# Stage 1: Build frontend
FROM node:20-slim AS frontend
FROM node:22-slim AS frontend
WORKDIR /app/frontend
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
@ -7,7 +7,7 @@ COPY frontend/ ./
RUN npm run build:no-prerender
# Stage 2: Build Rust server
FROM rust:1.83-bookworm AS server
FROM rust:1.84-bookworm AS server
WORKDIR /app
COPY server-rs/ server-rs/
WORKDIR /app/server-rs

View file

@ -5,9 +5,14 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
WORKDIR /app
COPY pyproject.toml ./
RUN uv pip install --system -r pyproject.toml
RUN playwright install --with-deps chromium
RUN playwright install-deps firefox
RUN camoufox fetch \
&& python -c "from camoufox.pkgman import camoufox_path; p = camoufox_path(download_if_missing=False); print('Camoufox verified at', p)"
COPY *.py ./
COPY property-data/arcgis_data.parquet /data/arcgis_data.parquet
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:1234/health')"
CMD ["python3", "main.py"]

View file

@ -31,6 +31,11 @@ SCRAPE_OPENRENT = os.environ.get("SCRAPE_OPENRENT", "true").lower() in (
"true",
"yes",
)
SCRAPE_ZOOPLA = os.environ.get("SCRAPE_ZOOPLA", "true").lower() in (
"1",
"true",
"yes",
)
# URL to trigger server data reload after scrape (e.g. http://server:8001/api/reload)
RELOAD_URL = os.environ.get("RELOAD_URL", "")
@ -47,6 +52,9 @@ HOMECOUK_PER_PAGE = 30 # max supported by the API
# OpenRent
OPENRENT_BASE = "https://www.openrent.co.uk"
# Zoopla
ZOOPLA_BASE = "https://www.zoopla.co.uk"
PROPERTY_TYPE_MAP = {
"Detached": "Detached",
"Semi-Detached": "Semi-Detached",

View file

@ -14,6 +14,7 @@ from constants import (
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
SCRAPE_ZOOPLA,
)
from homecouk import load_cookies as load_homecouk_cookies
from openrent import load_cookies as load_openrent_cookies
@ -48,6 +49,16 @@ log.setLevel(logging.DEBUG)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
# Suppress noisy /metrics and /health request logs from werkzeug
class _NoiseFilter(logging.Filter):
def filter(self, record):
msg = record.getMessage()
return "GET /metrics" not in msg and "GET /health" not in msg
logging.getLogger("werkzeug").addFilter(_NoiseFilter())
# ---------------------------------------------------------------------------
# Startup: load data
# ---------------------------------------------------------------------------
@ -55,13 +66,14 @@ logging.getLogger("httpcore").setLevel(logging.WARNING)
log.info("Loading arcgis data...")
OUTCODES = load_outcodes()
PC_INDEX = build_postcode_index()
PC_COORDS = build_postcode_coords() if SCRAPE_OPENRENT else None
PC_COORDS = build_postcode_coords() if (SCRAPE_OPENRENT or SCRAPE_ZOOPLA) else None
log.info(
"Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s)",
"Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s, openrent=%s, zoopla=%s)",
len(OUTCODES),
SCRAPE_RIGHTMOVE,
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_ZOOPLA,
)
# ---------------------------------------------------------------------------
@ -121,6 +133,11 @@ if SCHEDULE_HOUR >= 0:
app = Flask(__name__)
@app.route("/health")
def health():
return "ok", 200
@app.route("/run", methods=["POST"])
def trigger_run():
if _start_scrape():
@ -147,6 +164,7 @@ def get_status():
"rightmove": status.rm_properties,
"homecouk": status.hk_properties,
"openrent": status.or_properties,
"zoopla": status.zp_properties,
},
"errors": status.errors[-20:], # last 20 errors
"elapsed_seconds": round(elapsed, 1),
@ -167,8 +185,10 @@ def get_debug():
"scrape_rightmove": SCRAPE_RIGHTMOVE,
"scrape_homecouk": SCRAPE_HOMECOUK,
"scrape_openrent": SCRAPE_OPENRENT,
"scrape_zoopla": SCRAPE_ZOOPLA,
"homecouk_cookies_available": hk_cookies is not None,
"openrent_cookies_available": or_cookies is not None,
"zoopla_note": "browser-based (Camoufox), no cookies needed",
}
)

View file

@ -109,6 +109,28 @@ openrent_properties_scraped = Counter(
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — Zoopla
# ---------------------------------------------------------------------------
zoopla_pages_scraped = Counter(
"zoopla_pages_scraped",
"Search result pages scraped from Zoopla",
["channel"],
)
zoopla_errors_total = Counter(
"zoopla_errors_total",
"Zoopla scraping errors",
["type"],
)
zoopla_properties_scraped = Counter(
"zoopla_properties_scraped",
"Properties scraped from Zoopla (before dedup)",
["channel"],
)
# ---------------------------------------------------------------------------
# Counters — FlareSolverr / cookie management
# ---------------------------------------------------------------------------
@ -138,3 +160,8 @@ openrent_enabled = Gauge(
"openrent_enabled",
"Whether OpenRent scraping is currently active (1=yes, 0=no)",
)
zoopla_enabled = Gauge(
"zoopla_enabled",
"Whether Zoopla scraping is currently active (1=yes, 0=no)",
)

View file

@ -17,6 +17,7 @@ from constants import (
SCRAPE_HOMECOUK,
SCRAPE_OPENRENT,
SCRAPE_RIGHTMOVE,
SCRAPE_ZOOPLA,
SEED,
)
from homecouk import CookiesExpiredError
@ -35,12 +36,16 @@ from metrics import (
scrape_outcodes_total,
scrape_properties_total,
scrape_state,
zoopla_enabled,
)
from openrent import WafChallengeError
from openrent import load_cookies as load_openrent_cookies
from openrent import make_client as make_openrent_client
from openrent import search_outcode as openrent_search_outcode
from rightmove import resolve_outcode_id, search_outcode
from zoopla import TurnstileError
from zoopla import launch_browser as launch_zoopla_browser
from zoopla import search_outcode as zoopla_search_outcode
from spatial import PostcodeSpatialIndex
from storage import write_parquet
@ -60,6 +65,7 @@ class ScrapeStatus:
rm_properties: int = 0
hk_properties: int = 0
or_properties: int = 0
zp_properties: int = 0
errors: list[str] = field(default_factory=list)
started_at: float = 0.0
finished_at: float = 0.0
@ -93,6 +99,9 @@ def _sync_gauges() -> None:
scrape_properties_total.labels(channel=ch, source="openrent").set(
status.or_properties
)
scrape_properties_total.labels(channel=ch, source="zoopla").set(
status.zp_properties
)
if status.started_at:
end = status.finished_at if status.finished_at else time.time()
scrape_elapsed_seconds.set(end - status.started_at)
@ -191,7 +200,7 @@ def run_scrape(
random.seed(SEED)
random.shuffle(shuffled)
if not SCRAPE_RIGHTMOVE and not SCRAPE_HOMECOUK and not SCRAPE_OPENRENT:
if not SCRAPE_RIGHTMOVE and not SCRAPE_HOMECOUK and not SCRAPE_OPENRENT and not SCRAPE_ZOOPLA:
log.warning("All scrapers disabled — nothing to do")
with status_lock:
status.state = "done"
@ -239,8 +248,27 @@ def run_scrape(
)
openrent_enabled.set(0)
# Build postcode coords if OpenRent is active and caller didn't provide them
if or_client and pc_coords is None:
# Zoopla: uses Camoufox browser (no cookies/client pattern)
zp_browser = None
zp_page = None
zp_failed = False
if not SCRAPE_ZOOPLA:
log.info("Zoopla scraping DISABLED (SCRAPE_ZOOPLA=false)")
zoopla_enabled.set(0)
else:
try:
zp_browser, zp_page = launch_zoopla_browser()
log.info("Zoopla scraping ENABLED (Camoufox browser launched)")
zoopla_enabled.set(1)
except TurnstileError:
log.warning("Zoopla Cloudflare Turnstile failed — disabling Zoopla")
zoopla_enabled.set(0)
except Exception as e:
log.warning("Zoopla browser launch failed: %s — disabling Zoopla", e)
zoopla_enabled.set(0)
# Build postcode coords if OpenRent/Zoopla is active and caller didn't provide them
if (or_client or zp_page) and pc_coords is None:
pc_coords = build_postcode_coords()
try:
@ -256,6 +284,8 @@ def run_scrape(
hk_dedup_count = 0 # home.co.uk skipped as cross-source duplicates
or_count = 0 # OpenRent properties this channel
or_dedup_count = 0 # OpenRent skipped as cross-source duplicates
zp_count = 0 # Zoopla properties this channel
zp_dedup_count = 0 # Zoopla skipped as cross-source duplicates
with status_lock:
status.channel = channel_name
@ -264,6 +294,7 @@ def run_scrape(
status.rm_properties = 0
status.hk_properties = 0
status.or_properties = 0
status.zp_properties = 0
channel_start = time.time()
prev_prop_milestone = 0 # last 10k milestone we logged
@ -412,6 +443,63 @@ def run_scrape(
with status_lock:
status.errors.append(msg)
# --- Zoopla ---
if zp_page and not zp_failed:
made_requests = True
try:
zp_props = zoopla_search_outcode(
zp_page,
outcode,
channel_name,
pc_index,
pc_coords,
)
for p in zp_props:
pid = p["id"]
key = _dedup_key(p)
if pid in all_properties or key in seen_dedup_keys:
zp_dedup_count += 1
cross_source_dedup_total.labels(
channel="buy" if channel_name == "BUY" else "rent",
).inc()
continue
all_properties[pid] = p
seen_dedup_keys.add(key)
zp_count += 1
if zp_props:
log.info(
"Zoopla %s: +%d properties", outcode, len(zp_props)
)
except TurnstileError:
log.warning(
"Zoopla Cloudflare challenge failed — attempting browser relaunch"
)
try:
zp_browser.close()
except Exception:
pass
try:
zp_browser, zp_page = launch_zoopla_browser()
log.info("Zoopla browser relaunched, continuing")
except Exception:
log.warning(
"Browser relaunch failed, disabling Zoopla for rest of scrape"
)
zp_page = None
zp_browser = None
zp_failed = True
zoopla_enabled.set(0)
with status_lock:
status.errors.append(
"Zoopla Cloudflare challenge failed and browser relaunch failed"
)
except Exception as e:
msg = f"Error scraping Zoopla {outcode}/{channel_name}: {e}"
log.error(msg)
scrape_errors_total.labels(source="zoopla").inc()
with status_lock:
status.errors.append(msg)
with status_lock:
if channel_name == "BUY":
status.properties_buy = len(all_properties)
@ -420,6 +508,7 @@ def run_scrape(
status.rm_properties = rm_count
status.hk_properties = hk_count
status.or_properties = or_count
status.zp_properties = zp_count
_sync_gauges()
# Log progress every 100 outcodes
@ -444,12 +533,13 @@ def run_scrape(
if current_milestone > prev_prop_milestone:
prev_prop_milestone = current_milestone
log.info(
"%s %dk properties (rm: %d, hk: %d, or: %d) at outcode %d/%d [%s]",
"%s %dk properties (rm: %d, hk: %d, or: %d, zp: %d) at outcode %d/%d [%s]",
channel_name,
current_milestone * 10,
rm_count,
hk_count,
or_count,
zp_count,
done,
len(shuffled),
_fmt_elapsed(elapsed),
@ -472,13 +562,14 @@ def run_scrape(
_sync_gauges()
log.info(
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, cross-dedup: %d) ===",
"=== %s channel complete: %d unique (rm: %d, hk: %d, or: %d, zp: %d, cross-dedup: %d) ===",
channel_name,
len(deduped),
rm_count,
hk_count,
or_count,
hk_dedup_count + or_dedup_count,
zp_count,
hk_dedup_count + or_dedup_count + zp_dedup_count,
)
with status_lock:
@ -525,3 +616,8 @@ def run_scrape(
hk_client.close()
if or_client:
or_client.close()
if zp_browser:
try:
zp_browser.close()
except Exception:
pass

View file

@ -25,7 +25,11 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
if fvd:
try:
dt = datetime.fromisoformat(fvd.replace("Z", "+00:00"))
listing_dates.append(dt.replace(tzinfo=None))
# Convert to UTC naive datetime for consistent storage
if dt.tzinfo is not None:
from datetime import timezone
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
listing_dates.append(dt)
except (ValueError, TypeError):
listing_dates.append(None)
else:

520
finder/zoopla.py Normal file
View file

@ -0,0 +1,520 @@
"""Zoopla (zoopla.co.uk) scraper — buy and rental properties.
Zoopla is behind Cloudflare Turnstile (managed interactive challenge), which
blocks all HTTP clients (curl_cffi, httpx) and even Playwright with stealth
patches. Only Camoufox (an anti-fingerprinting Firefox fork) passes reliably.
Zoopla uses Next.js App Router with React Server Components (RSC). Search
result data is server-rendered in an RSC stream, not available via
__NEXT_DATA__ or a JSON API. URL-based location slugs return 0 results
the working flow requires typing into the autocomplete input, selecting a
suggestion, and clicking Search.
Architecture:
Unlike the other scrapers which use HTTP clients per outcode, Zoopla keeps
a single Camoufox browser alive for the entire scrape. For each outcode, it:
1. Clears and types the outcode into the search input
2. Selects the first autocomplete suggestion
3. Clicks Search
4. Extracts listing data from the rendered DOM
5. Handles pagination via ?pn=N parameter
The browser session replaces the cookie/client pattern used by other scrapers.
"""
import logging
import re
import time
from constants import DELAY_BETWEEN_PAGES, PROPERTY_TYPE_MAP, ZOOPLA_BASE
from metrics import zoopla_errors_total, zoopla_pages_scraped, zoopla_properties_scraped
from spatial import PostcodeSpatialIndex
log = logging.getLogger("zoopla")
class TurnstileError(Exception):
"""Raised when Cloudflare Turnstile challenge cannot be passed."""
# Maximum search result pages to scrape per outcode (25 listings/page)
MAX_PAGES_PER_OUTCODE = 10
# JavaScript to extract listings from the rendered DOM.
# Finds all detail links, walks up to the card container, and parses
# price, beds, baths, floor area, address, and tenure from the card text.
_EXTRACT_LISTINGS_JS = r"""() => {
const links = Array.from(document.querySelectorAll(
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
));
const seen = new Set();
const results = [];
for (const link of links) {
const href = link.href;
const match = href.match(/\/details\/(\d+)\//);
if (!match) continue;
const id = match[1];
if (seen.has(id)) continue;
seen.add(id);
// Walk up to the listing card container
let card = link;
for (let j = 0; j < 10; j++) {
card = card.parentElement;
if (!card) break;
const text = card.innerText || '';
if (text.includes('\u00a3') && (text.includes('bed') || text.includes('sq ft'))) {
break;
}
}
if (!card) continue;
const text = card.innerText || '';
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
const priceMatch = text.match(/\u00a3([\d,]+)/);
const bedsMatch = text.match(/(\d+)\s*beds?/i);
const bathsMatch = text.match(/(\d+)\s*baths?/i);
const recMatch = text.match(/(\d+)\s*reception/i);
const areaMatch = text.match(/([\d,]+)\s*sq\s*ft/i);
let address = '';
for (const line of lines) {
if (/[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i.test(line) ||
(line.includes(',') && !line.includes('\u00a3') && !/^\d+ beds?/i.test(line))) {
address = line;
break;
}
}
let tenure = '';
if (/freehold/i.test(text)) tenure = 'Freehold';
else if (/leasehold/i.test(text)) tenure = 'Leasehold';
results.push({
id: id,
url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
beds: bedsMatch ? parseInt(bedsMatch[1]) : null,
baths: bathsMatch ? parseInt(bathsMatch[1]) : null,
receptions: recMatch ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address: address,
tenure: tenure,
});
}
return results;
}"""
# JavaScript to dismiss the Usercentrics cookie consent overlay (shadow DOM).
_DISMISS_COOKIES_JS = """() => {
const aside = document.querySelector('#usercentrics-cmp-ui');
if (aside && aside.shadowRoot) {
const btns = aside.shadowRoot.querySelectorAll('button');
for (const btn of btns) {
if (btn.innerText.includes('Accept')) { btn.click(); return true; }
}
}
if (aside) { aside.remove(); return true; }
return false;
}"""
# ---------------------------------------------------------------------------
# Browser lifecycle
# ---------------------------------------------------------------------------
def launch_browser():
"""Launch Camoufox, navigate to Zoopla homepage, pass Cloudflare Turnstile,
and dismiss cookie consent. Returns (browser, page) tuple.
Raises TurnstileError if Cloudflare cannot be passed within 60 seconds.
Caller must close browser when done."""
from camoufox.pkgman import camoufox_path
# Verify camoufox is pre-installed — never download at runtime
camoufox_path(download_if_missing=False)
from camoufox.sync_api import Camoufox
log.info("Launching Camoufox browser for Zoopla...")
browser = Camoufox(headless=True).__enter__()
page = browser.new_page()
log.info("Navigating to Zoopla homepage...")
page.goto(f"{ZOOPLA_BASE}/", wait_until="domcontentloaded", timeout=60000)
# Wait for Cloudflare Turnstile to resolve.
# Try clicking the Turnstile checkbox if present (helps in some cases).
for i in range(20):
if "Just a moment" not in page.title():
break
# Attempt to click the Turnstile checkbox in the challenge iframe
for frame in page.frames:
if "challenges.cloudflare.com" in frame.url:
try:
iframe_el = page.query_selector('iframe[src*="challenges.cloudflare"]')
if iframe_el:
box = iframe_el.bounding_box()
if box:
page.mouse.click(box["x"] + 30, box["y"] + box["height"] / 2)
except Exception:
pass
break
time.sleep(3)
else:
page.close()
browser.close()
raise TurnstileError("Cloudflare Turnstile did not resolve after 60s")
log.info("Cloudflare passed — title: %s", page.title())
time.sleep(2)
# Dismiss cookie consent
page.evaluate(_DISMISS_COOKIES_JS)
time.sleep(1)
return browser, page
def _ensure_not_challenged(page) -> None:
"""Check if current page is a Cloudflare challenge and wait/raise."""
if "Just a moment" not in page.title():
return
log.warning("Cloudflare challenge detected mid-session, waiting...")
for i in range(20):
time.sleep(3)
if "Just a moment" not in page.title():
log.info("Cloudflare challenge resolved")
return
raise TurnstileError("Cloudflare re-challenge did not resolve")
# ---------------------------------------------------------------------------
# Search navigation
# ---------------------------------------------------------------------------
def _navigate_search(page, outcode: str, channel: str) -> bool:
"""Navigate to search results for an outcode via the homepage search flow.
Returns True if results were found, False if no results or navigation failed.
Raises TurnstileError if Cloudflare blocks us."""
# Navigate to homepage to reset search state
page.goto(f"{ZOOPLA_BASE}/", wait_until="domcontentloaded", timeout=30000)
time.sleep(2)
_ensure_not_challenged(page)
# Dismiss cookie consent (may reappear after navigation)
page.evaluate(_DISMISS_COOKIES_JS)
time.sleep(1)
# Select Buy/Rent tab
if channel == "RENT":
rent_tab = page.query_selector(
'button:has-text("Rent"), [role="tab"]:has-text("Rent")'
)
if rent_tab:
rent_tab.click()
time.sleep(0.5)
# Find and fill search input
search_input = page.query_selector(
'input[name="autosuggest-input"]'
) or page.query_selector('input[type="text"]')
if not search_input:
log.warning("Could not find search input on homepage")
return False
search_input.click()
time.sleep(0.3)
search_input.fill("")
search_input.type(outcode, delay=60)
time.sleep(2)
# Select first autocomplete suggestion
first_option = page.query_selector('[role="option"]')
if not first_option:
log.debug("No autocomplete suggestions for outcode %s", outcode)
return False
first_option.click()
time.sleep(0.5)
# Click search button
search_btn = page.query_selector('button:has-text("Search")')
if search_btn:
search_btn.click()
else:
search_input.press("Enter")
# Wait for results to load
time.sleep(6)
_ensure_not_challenged(page)
return True
def _get_result_count(page) -> int:
"""Extract the total results count from the page body text."""
try:
body = page.inner_text("body")
match = re.search(r"([\d,]+)\s+results?", body)
if match:
return int(match.group(1).replace(",", ""))
except Exception:
pass
return 0
# ---------------------------------------------------------------------------
# Extraction and pagination
# ---------------------------------------------------------------------------
def _extract_listings(page) -> list[dict]:
"""Extract listing data from the current search results page DOM."""
try:
return page.evaluate(_EXTRACT_LISTINGS_JS)
except Exception as e:
log.warning("Failed to extract listings from DOM: %s", e)
zoopla_errors_total.labels(type="extract_failed").inc()
return []
def _paginate(page, total_results: int, channel: str) -> list[dict]:
"""Extract listings from all pages of search results.
Page 1 is already loaded. For subsequent pages, clicks the Next button
or navigates via URL parameter ?pn=N."""
all_listings = _extract_listings(page)
channel_label = "buy" if channel == "BUY" else "rent"
zoopla_pages_scraped.labels(channel=channel_label).inc()
if not all_listings or total_results <= len(all_listings):
return all_listings
seen_ids = {l["id"] for l in all_listings}
current_url = page.url
page_num = 2
while len(all_listings) < total_results and page_num <= MAX_PAGES_PER_OUTCODE:
time.sleep(DELAY_BETWEEN_PAGES)
# Try navigating via URL parameter
if "?" in current_url:
next_url = re.sub(r"[?&]pn=\d+", "", current_url)
separator = "&" if "?" in next_url else "?"
next_url = f"{next_url}{separator}pn={page_num}"
else:
next_url = f"{current_url}?pn={page_num}"
try:
page.goto(next_url, wait_until="domcontentloaded", timeout=30000)
time.sleep(4)
_ensure_not_challenged(page)
except TurnstileError:
raise
except Exception as e:
log.debug("Pagination navigation failed at page %d: %s", page_num, e)
break
page_listings = _extract_listings(page)
if not page_listings:
break
# Deduplicate within this outcode
new_count = 0
for listing in page_listings:
if listing["id"] not in seen_ids:
seen_ids.add(listing["id"])
all_listings.append(listing)
new_count += 1
zoopla_pages_scraped.labels(channel=channel_label).inc()
if new_count == 0:
break # No new listings on this page
page_num += 1
return all_listings
# ---------------------------------------------------------------------------
# Property transformation
# ---------------------------------------------------------------------------
def _extract_postcode(text: str) -> str | None:
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
return match.group(1).upper().strip()
return None
def _extract_outcode(text: str) -> str | None:
"""Extract a UK outcode from address text like 'Whitechapel Road, London E1'."""
# Look for outcode at end of string or after last comma
match = re.search(r"\b([A-Z]{1,2}\d[A-Z0-9]?)\s*$", text.strip(), re.IGNORECASE)
if match:
return match.group(1).upper()
# Try after comma
parts = text.split(",")
if len(parts) > 1:
last = parts[-1].strip()
match = re.match(r"^([A-Z]{1,2}\d[A-Z0-9]?)$", last, re.IGNORECASE)
if match:
return match.group(1).upper()
return None
def _map_property_type(raw_type: str | None) -> str:
"""Map Zoopla property type text to canonical type."""
if not raw_type:
return "Other"
canonical = PROPERTY_TYPE_MAP.get(raw_type)
if canonical:
return canonical
lower = raw_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower:
return "Detached"
if "semi" in lower:
return "Semi-Detached"
if "terrace" in lower or "mews" in lower:
return "Terraced"
if "house" in lower:
return "Detached"
return "Other"
def transform_property(
raw: dict,
channel: str,
pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]],
) -> dict | None:
"""Transform a raw Zoopla listing dict into the standard output schema.
Zoopla search cards do not include coordinates, so we resolve lat/lng
from postcodes extracted from the address text."""
price = raw.get("price")
if not price:
return None
address = raw.get("address", "")
# Resolve postcode and coordinates from address
postcode = _extract_postcode(address)
lat = lng = None
if postcode:
coords = pc_coords.get(postcode)
if coords:
lat, lng = coords
if lat is None:
# Try outcode-level fallback
outcode = _extract_outcode(address)
if outcode:
prefix = outcode + " "
for pcd, coords in pc_coords.items():
if pcd.startswith(prefix):
postcode = pcd
lat, lng = coords
break
if lat is None or lng is None or not postcode:
return None
# Validate coordinates are in England
if not (49 <= lat <= 56 and -7 <= lng <= 2):
return None
bedrooms = raw.get("beds") or 0
bathrooms = raw.get("baths") or 0
receptions = raw.get("receptions") or 0
# Floor area: convert sq ft to sq m
floor_area_sqm = None
sqft = raw.get("floor_area_sqft")
if sqft:
floor_area_sqm = round(sqft * 0.092903, 1)
listing_id = raw.get("id", "")
listing_url = raw.get("url", "")
if listing_url and not listing_url.startswith("http"):
listing_url = ZOOPLA_BASE + listing_url
return {
"id": f"zp_{listing_id}",
"Bedrooms": bedrooms,
"Bathrooms": bathrooms,
"Number of bedrooms & living rooms": bedrooms + receptions,
"lon": lng,
"lat": lat,
"Postcode": postcode,
"Address per Property Register": address,
"Leasehold/Freehold": raw.get("tenure") or None,
"Property type": "Other", # Not reliably extractable from Zoopla search cards
"Property sub-type": "",
"price": int(price),
"price_frequency": "" if channel == "BUY" else "monthly",
"Price qualifier": "",
"Total floor area (sqm)": floor_area_sqm,
"Listing URL": listing_url,
"Listing features": [],
"first_visible_date": "",
}
# ---------------------------------------------------------------------------
# Top-level search function (called by scraper.py)
# ---------------------------------------------------------------------------
def search_outcode(
page,
outcode: str,
channel: str,
pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]],
) -> list[dict]:
"""Search Zoopla for properties in one outcode.
Takes a live Camoufox Page (from launch_browser). Navigates through the
search flow, extracts listings from rendered DOM, and transforms to the
standard output schema.
Raises TurnstileError if Cloudflare blocks us mid-session.
"""
if not _navigate_search(page, outcode, channel):
return []
total_results = _get_result_count(page)
if total_results == 0:
return []
raw_listings = _paginate(page, total_results, channel)
if not raw_listings:
return []
channel_label = "buy" if channel == "BUY" else "rent"
properties = []
for raw in raw_listings:
transformed = transform_property(raw, channel, pc_index, pc_coords)
if transformed:
properties.append(transformed)
zoopla_properties_scraped.labels(channel=channel_label).inc()
return properties

View file

@ -395,6 +395,7 @@ export default function App() {
onUnsaveProperty={user ? savedProperties.deleteProperty : undefined}
isPropertySaved={user ? savedProperties.isPropertySaved : undefined}
getSavedPropertyId={user ? savedProperties.getSavedPropertyId : undefined}
deferTutorial={showLicenseSuccess}
/>
)}
{showAuthModal && (

View file

@ -2,7 +2,7 @@ import { useState, useCallback, useEffect, useRef } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import type { SavedSearch } from '../../hooks/useSavedSearches';
import type { SavedProperty, SavedPropertyData } from '../../hooks/useSavedProperties';
import { apiUrl, authHeaders, assertOk, shortenUrl } from '../../lib/api';
import { apiUrl, authHeaders, assertOk, shortenUrl, prewarmScreenshot } from '../../lib/api';
import { copyToClipboard } from '../../lib/clipboard';
import { formatRelativeTime, formatNumber } from '../../lib/format';
import { summarizeParams } from '../../lib/url-state';
@ -172,6 +172,7 @@ function SavedSearchesTab({
const handleShare = useCallback(
async (params: string, id: string) => {
prewarmScreenshot(params);
setSharingId(id);
try {
const shortUrl = await shortenUrl(params);
@ -213,7 +214,7 @@ function SavedSearchesTab({
{searches.map((search) => (
<div
key={search.id}
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden"
className="flex flex-col bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden"
>
{search.screenshotUrl ? (
<img
@ -227,7 +228,7 @@ function SavedSearchesTab({
</div>
)}
<div className="p-4">
<div className="p-4 flex flex-col flex-1">
<h3 className="font-medium text-navy-950 dark:text-warm-100 truncate mb-1">
{search.name}
</h3>
@ -238,14 +239,14 @@ function SavedSearchesTab({
{summarizeParams(search.params)}
</p>
<div className="mb-3">
<div className="mb-3 flex-1">
<NotesInput
value={search.notes}
onSave={(notes) => onUpdateNotes(search.id, notes)}
/>
</div>
<div className="flex gap-2">
<div className="flex gap-2 mt-auto">
<button
onClick={() => onOpen(search.params)}
className="flex-1 px-3 py-1.5 text-sm font-medium rounded bg-teal-600 text-white hover:bg-teal-700"
@ -342,7 +343,7 @@ function SavedPropertiesTab({
return (
<div
key={prop.id}
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden p-4"
className="flex flex-col bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg overflow-hidden p-4"
>
<div className="mb-1">
<h3 className="font-medium text-navy-950 dark:text-warm-100 leading-tight">
@ -360,35 +361,37 @@ function SavedPropertiesTab({
{formatRelativeTime(prop.created)}
</p>
<div className="mb-3">
<div className="mb-3 flex-1">
<NotesInput value={prop.notes} onSave={(notes) => onUpdateNotes(prop.id, notes)} />
</div>
<div className="flex gap-2">
<button
onClick={() => onOpen(prop.postcode)}
className="flex-1 px-3 py-1.5 text-sm font-medium rounded bg-teal-600 text-white hover:bg-teal-700"
>
Open postcode
</button>
<button
onClick={() => setDeleteConfirmId(prop.id)}
className="px-3 py-1.5 text-sm rounded border border-warm-200 dark:border-warm-700 text-warm-500 dark:text-warm-400 hover:text-warm-700 dark:hover:text-warm-300"
title="Delete"
>
<TrashIcon className="w-4 h-4" />
</button>
<div className="mt-auto">
<div className="flex gap-2">
<button
onClick={() => onOpen(prop.postcode)}
className="flex-1 px-3 py-1.5 text-sm font-medium rounded bg-teal-600 text-white hover:bg-teal-700"
>
Open postcode
</button>
<button
onClick={() => setDeleteConfirmId(prop.id)}
className="px-3 py-1.5 text-sm rounded border border-warm-200 dark:border-warm-700 text-warm-500 dark:text-warm-400 hover:text-warm-700 dark:hover:text-warm-300"
title="Delete"
>
<TrashIcon className="w-4 h-4" />
</button>
</div>
{prop.data.listingUrl && (
<a
href={prop.data.listingUrl}
target="_blank"
rel="noopener noreferrer"
className="mt-2 block text-center px-3 py-1.5 text-sm rounded border border-warm-200 dark:border-warm-700 text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300"
>
View listing &rarr;
</a>
)}
</div>
{prop.data.listingUrl && (
<a
href={prop.data.listingUrl}
target="_blank"
rel="noopener noreferrer"
className="mt-2 block text-center px-3 py-1.5 text-sm rounded border border-warm-200 dark:border-warm-700 text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300"
>
View listing &rarr;
</a>
)}
</div>
);
})}

View file

@ -94,10 +94,14 @@ export default function InvitePage({
const isDark = theme === 'dark';
// Signal screenshot readiness once loading completes
// Signal screenshot readiness once loading completes and a frame has painted
useEffect(() => {
if (screenshotMode && !loading) {
window.__screenshot_ready = true;
requestAnimationFrame(() => {
requestAnimationFrame(() => {
window.__screenshot_ready = true;
});
});
}
}, [screenshotMode, loading]);
@ -313,7 +317,7 @@ export default function InvitePage({
<button
onClick={handleRedeem}
disabled={redeeming}
className="w-full px-6 py-3 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-lg shadow-lg shadow-coral-500/25 disabled:opacity-50 disabled:cursor-wait flex items-center justify-center gap-2"
className="w-full px-6 py-3 bg-teal-600 text-white rounded-lg font-semibold hover:bg-teal-700 transition-colors text-lg shadow-lg shadow-teal-600/25 disabled:opacity-50 disabled:cursor-wait flex items-center justify-center gap-2"
>
{redeeming && <SpinnerIcon className="w-5 h-5 animate-spin" />}
{isAdminInvite

View file

@ -10,23 +10,18 @@ import { groupFeaturesByCategory } from '../../lib/features';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
import { FeatureActions } from '../ui/FeatureIcons';
import { FeatureLabel } from '../ui/FeatureLabel';
import { CarIcon, BicycleIcon, WalkingIcon, TransitIcon, PlusIcon } from '../ui/icons';
import type { ComponentType } from 'react';
import { PlusIcon, InfoIcon } from '../ui/icons';
import { IconButton } from '../ui/IconButton';
import { TravelTimeInfoPopup } from '../ui/TravelTimeInfoPopup';
import {
TRANSPORT_MODES,
MODE_LABELS,
MODE_DESCRIPTIONS,
MODE_ICONS,
type TransportMode,
type TravelTimeEntry,
} from '../../hooks/useTravelTime';
const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
interface FeatureBrowserProps {
availableFeatures: FeatureMeta[];
allFeatures: FeatureMeta[];
@ -58,6 +53,7 @@ export default function FeatureBrowser({
}: FeatureBrowserProps) {
const [search, setSearch] = useState('');
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
const [travelInfoMode, setTravelInfoMode] = useState<TransportMode | null>(null);
const [expandedGroups, toggleGroup] = useCollapsibleGroups();
const availableTravelModes = useTravelModes();
@ -175,6 +171,12 @@ export default function FeatureBrowser({
</div>
</div>
<div className="flex items-center gap-0.5 shrink-0">
<IconButton
onClick={() => setTravelInfoMode(mode)}
title="Feature info"
>
<InfoIcon className="w-3.5 h-3.5" />
</IconButton>
<button
onClick={() => onAddTravelTimeEntry(mode)}
title={`Add ${MODE_LABELS[mode]} travel time`}
@ -241,6 +243,9 @@ export default function FeatureBrowser({
onNavigateToSource={onNavigateToSource}
/>
)}
{travelInfoMode && (
<TravelTimeInfoPopup mode={travelInfoMode} onClose={() => setTravelInfoMode(null)} />
)}
</>
);
}

View file

@ -1,11 +1,11 @@
import { memo, useState, useMemo, useRef, useCallback, useEffect } from 'react';
import { Slider } from '../ui/Slider';
import { LightbulbIcon } from '../ui/icons';
import { ChevronIcon, LightbulbIcon } from '../ui/icons';
import { PillToggle } from '../ui/PillToggle';
import { PillGroup } from '../ui/PillGroup';
import type { FeatureMeta, FeatureFilters } from '../../types';
import { formatFilterValue, buildPercentileScale } from '../../lib/format';
import { formatFilterValue, parseInputValue, buildPercentileScale } from '../../lib/format';
import type { PercentileScale } from '../../lib/format';
import InfoPopup from '../ui/InfoPopup';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
@ -23,6 +23,73 @@ import {
type ListingType = 'historical' | 'buy' | 'rent';
function EditableLabel({
value,
formatted,
onCommit,
prefix,
suffix,
className,
style,
}: {
value: number;
formatted: string;
onCommit: (v: number) => void;
prefix?: string;
suffix?: string;
className?: string;
style?: React.CSSProperties;
}) {
const [editing, setEditing] = useState(false);
const [text, setText] = useState('');
const inputRef = useRef<HTMLInputElement>(null);
const startEdit = () => {
setEditing(true);
setText(String(Math.round(value)));
};
const commit = () => {
const parsed = parseInputValue(text, { prefix, suffix });
if (parsed != null) onCommit(parsed);
setEditing(false);
};
useEffect(() => {
if (editing) {
inputRef.current?.focus();
inputRef.current?.select();
}
}, [editing]);
if (editing) {
return (
<input
ref={inputRef}
value={text}
onChange={(e) => setText(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter') commit();
if (e.key === 'Escape') setEditing(false);
}}
onBlur={commit}
className="absolute -translate-x-1/2 w-16 text-[10px] text-center rounded border border-warm-300 dark:border-warm-600 bg-white dark:bg-warm-800 text-warm-700 dark:text-warm-200 px-0.5 focus:outline-none focus:ring-1 focus:ring-teal-400"
style={style}
/>
);
}
return (
<span
className={`absolute -translate-x-1/2 cursor-pointer hover:text-teal-600 dark:hover:text-teal-400 border-b border-dotted border-warm-400 dark:border-warm-500 ${className ?? ''}`}
style={style}
onClick={startEdit}
>
{formatted}
</span>
);
}
function SliderLabels({
min,
max,
@ -31,6 +98,8 @@ function SliderLabels({
isAtMin,
isAtMax,
raw,
feature,
onValueChange,
}: {
min: number;
max: number;
@ -39,18 +108,47 @@ function SliderLabels({
isAtMin?: boolean;
isAtMax?: boolean;
raw?: boolean;
feature?: FeatureMeta;
onValueChange?: (v: [number, number]) => void;
}) {
const range = max - min || 1;
const leftPct = ((value[0] - min) / range) * 100;
const rightPct = ((value[1] - min) / range) * 100;
const labels = displayValues || value;
const minLabel = isAtMin ? 'min' : formatFilterValue(labels[0], raw);
const maxLabel = isAtMax ? 'max' : formatFilterValue(labels[1], raw);
if (feature && onValueChange) {
return (
<div className="relative h-4 mt-2 mx-2.5 text-[10px] text-warm-500 dark:text-warm-400 leading-tight">
<EditableLabel
value={labels[0]}
formatted={minLabel}
onCommit={(v) => onValueChange([v, labels[1]])}
prefix={feature.prefix}
suffix={feature.suffix}
style={{ left: `${leftPct}%` }}
/>
<EditableLabel
value={labels[1]}
formatted={maxLabel}
onCommit={(v) => onValueChange([labels[0], v])}
prefix={feature.prefix}
suffix={feature.suffix}
style={{ left: `${rightPct}%` }}
/>
</div>
);
}
return (
<div className="relative h-4 mt-2 mx-2.5 text-[10px] text-warm-500 dark:text-warm-400 leading-tight">
<span className="absolute -translate-x-1/2" style={{ left: `${leftPct}%` }}>
{isAtMin ? 'min' : formatFilterValue(labels[0], raw)}
{minLabel}
</span>
<span className="absolute -translate-x-1/2" style={{ left: `${rightPct}%` }}>
{isAtMax ? 'max' : formatFilterValue(labels[1], raw)}
{maxLabel}
</span>
</div>
);
@ -246,6 +344,7 @@ export default memo(function Filters({
const scrollRef = useRef<HTMLDivElement>(null);
const [showPhilosophy, setShowPhilosophy] = useState(false);
const [activeInfoFeature, setActiveInfoFeature] = useState<FeatureMeta | null>(null);
const [addFilterCollapsed, setAddFilterCollapsed] = useState(false);
const activeEntryCount = travelTimeEntries.length;
const pendingScrollRef = useRef<string | null>(null);
@ -292,10 +391,10 @@ export default memo(function Filters({
ref={containerRef}
className="flex flex-col bg-white dark:bg-navy-950 overflow-y-auto md:overflow-hidden h-full touch-pan-y"
>
<div className="shrink-0 md:shrink md:min-h-0 flex flex-col md:basis-[40%]">
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
<div className={`shrink-0 md:shrink md:min-h-0 flex flex-col ${addFilterCollapsed ? '' : 'md:basis-[40%]'}`}>
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700 bg-teal-50 dark:bg-teal-900/30">
<div className="flex items-center gap-2">
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">
<span className="text-sm font-semibold text-teal-700 dark:text-teal-400">
Active Filters
</span>
{badgeCount > 0 && (
@ -427,16 +526,18 @@ export default memo(function Filters({
const scale = percentileScales.get(feature.name);
const dataMin = hist?.min ?? feature.min!;
const dataMax = hist?.max ?? feature.max!;
const isAtMin = displayValue[0] <= dataMin;
const isAtMax = displayValue[1] >= dataMax;
const clampMin = displayValue[0] <= dataMin;
const clampMax = displayValue[1] >= dataMax;
const isAtMin = displayValue[0] === dataMin;
const isAtMax = displayValue[1] === dataMax;
const sliderValue: [number, number] = scale
? [
isAtMin ? 0 : Math.round(scale.toPercentile(displayValue[0])),
isAtMax ? 100 : Math.round(scale.toPercentile(displayValue[1])),
clampMin ? 0 : Math.round(scale.toPercentile(displayValue[0])),
clampMax ? 100 : Math.round(scale.toPercentile(displayValue[1])),
]
: [
isAtMin ? feature.min! : displayValue[0],
isAtMax ? feature.max! : displayValue[1],
clampMin ? feature.min! : displayValue[0],
clampMax ? feature.max! : displayValue[1],
];
return (
@ -494,6 +595,8 @@ export default memo(function Filters({
isAtMin={isAtMin}
isAtMax={isAtMax}
raw={feature.raw}
feature={feature}
onValueChange={(v) => onFilterChange(feature.name, v)}
/>
</div>
</div>
@ -503,26 +606,32 @@ export default memo(function Filters({
</div>
</div>
<div className="shrink-0 md:shrink md:min-h-0 hidden md:flex flex-col md:basis-[60%] border-t border-warm-200 dark:border-warm-700">
<div className="shrink-0 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Add Filter</span>
</div>
<div className="md:min-h-0 md:flex-1 flex flex-col">
<FeatureBrowser
availableFeatures={availableFeatures}
allFeatures={features}
pinnedFeature={pinnedFeature}
onAddFilter={handleAddAndScroll}
onTogglePin={onTogglePin}
onNavigateToSource={onNavigateToSource}
openInfoFeature={openInfoFeature}
onClearOpenInfoFeature={onClearOpenInfoFeature}
travelTimeEntries={travelTimeEntries}
onAddTravelTimeEntry={handleAddTravelTimeAndScroll}
isLicensed={isLicensed}
onUpgradeClick={onUpgradeClick}
/>
</div>
<div className={`shrink-0 md:shrink md:min-h-0 flex flex-col border-t border-warm-200 dark:border-warm-700 ${addFilterCollapsed ? '' : 'md:basis-[60%]'}`}>
<button
onClick={() => setAddFilterCollapsed((v) => !v)}
className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700 bg-teal-50 dark:bg-teal-900/30 cursor-pointer hover:bg-teal-100 dark:hover:bg-teal-900/50"
>
<span className="text-sm font-semibold text-teal-700 dark:text-teal-400">Add Filter</span>
<ChevronIcon direction={addFilterCollapsed ? 'down' : 'up'} className="w-4 h-4 text-warm-400 dark:text-warm-500" />
</button>
{!addFilterCollapsed && (
<div className="md:min-h-0 md:flex-1 flex flex-col">
<FeatureBrowser
availableFeatures={availableFeatures}
allFeatures={features}
pinnedFeature={pinnedFeature}
onAddFilter={handleAddAndScroll}
onTogglePin={onTogglePin}
onNavigateToSource={onNavigateToSource}
openInfoFeature={openInfoFeature}
onClearOpenInfoFeature={onClearOpenInfoFeature}
travelTimeEntries={travelTimeEntries}
onAddTravelTimeEntry={handleAddTravelTimeAndScroll}
isLicensed={isLicensed}
onUpgradeClick={onUpgradeClick}
/>
</div>
)}
</div>
{showPhilosophy && (

View file

@ -111,7 +111,7 @@ export default memo(function HoverCard({
)}
{/* Hint */}
<div className="text-[10px] text-warm-400 dark:text-warm-400 mt-2 text-center">
<div className="text-[10px] text-warm-400 dark:text-warm-500 mt-2 text-center">
Click for details
</div>
</div>

View file

@ -241,7 +241,7 @@ export default function JourneyInstructions({
<span className="text-xs font-medium text-warm-700 dark:text-warm-300">
To {j.label || j.slug}
</span>
{displayLegs && displayLegs.length > 0 && (
{!j.loading && totalMin > 0 && (
<span className="text-xs font-semibold text-teal-700 dark:text-teal-400">
{totalMin} min
</span>
@ -269,6 +269,26 @@ export default function JourneyInstructions({
</svg>
</a>
</div>
) : j.minutes != null ? (
<div>
<div className="flex items-center gap-1.5 py-0.5">
<WalkingIcon className="w-3.5 h-3.5 text-warm-500 dark:text-warm-400 shrink-0" />
<span className="text-xs text-warm-600 dark:text-warm-300">
Walk · {j.minutes} min
</span>
</div>
<a
href={googleMapsUrl(postcode, j.label || j.slug)}
target="_blank"
rel="noopener noreferrer"
className="mt-2 flex items-center justify-center gap-1.5 w-full text-[11px] font-medium text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 bg-white dark:bg-warm-900 border border-warm-200 dark:border-warm-700 rounded-md py-1.5 transition-colors"
>
View on Google Maps
<svg className="w-3 h-3" viewBox="0 0 12 12" fill="none" stroke="currentColor" strokeWidth="1.5">
<path d="M4.5 1.5H2a.5.5 0 00-.5.5v8a.5.5 0 00.5.5h8a.5.5 0 00.5-.5V7.5M7.5 1.5H10.5V4.5M10.5 1.5L5.5 6.5" strokeLinecap="round" strokeLinejoin="round" />
</svg>
</a>
</div>
) : (
<span className="text-xs text-warm-500 dark:text-warm-400">
No journey data available

View file

@ -72,6 +72,7 @@ interface MapPageProps {
onUnsaveProperty?: (id: string) => void;
isPropertySaved?: (address?: string, postcode?: string) => boolean;
getSavedPropertyId?: (address?: string, postcode?: string) => string | undefined;
deferTutorial?: boolean;
}
export default function MapPage({
@ -99,6 +100,7 @@ export default function MapPage({
onUnsaveProperty,
isPropertySaved,
getSavedPropertyId,
deferTutorial = false,
}: MapPageProps) {
const [selectedPOICategories, setSelectedPOICategories] =
useState<Set<string>>(initialPOICategories);
@ -153,6 +155,14 @@ export default function MapPage({
const handleAiFilterSubmit = useCallback(
async (query: string) => {
// Derive current listing type from Listing status filter
const listingVal = filters['Listing status'] as string[] | undefined;
const listingType = listingVal?.includes('For sale')
? 'buy'
: listingVal?.includes('For rent')
? 'rent'
: 'historical';
// Build context from current filters for conversational refinement
const context = {
filters,
@ -165,7 +175,11 @@ export default function MapPage({
};
const hasContext = Object.keys(context.filters).length > 0 || context.travelTime.length > 0;
const result = await aiFilters.fetchAiFilters(query, hasContext ? context : undefined);
const result = await aiFilters.fetchAiFilters(
query,
hasContext ? context : undefined,
listingType
);
if (!result) return;
handleSetFilters(result.filters);
// Always sync travel time entries — clear stale ones when AI returns none
@ -354,7 +368,7 @@ export default function MapPage({
selection.areaStats?.central_postcode,
]);
const tutorial = useTutorial(initialLoading, isMobile);
const tutorial = useTutorial(initialLoading, isMobile, deferTutorial);
const [exporting, setExporting] = useState(false);
const handleExport = useCallback(() => {
@ -418,7 +432,14 @@ export default function MapPage({
? mapData.postcodeData.length > 0
: mapData.data.length > 0;
if (hasData) {
window.__screenshot_ready = true;
// Wait for deck.gl to actually paint: in interleaved MapboxOverlay mode,
// hexagons render during MapLibre's rAF cycle. Double-rAF ensures at
// least one full paint has completed before we signal readiness.
requestAnimationFrame(() => {
requestAnimationFrame(() => {
window.__screenshot_ready = true;
});
});
}
}
}, [

View file

@ -84,7 +84,7 @@ export default function POIPane({
const selectedCount = selectedCategories.size;
return (
<div className="flex flex-col h-full bg-white dark:bg-navy-950 shadow-lg overflow-hidden">
<div className="flex flex-col h-full bg-white dark:bg-warm-900 shadow-lg overflow-hidden">
<div className="flex-shrink-0 px-3 pt-3 pb-2">
<div className="flex items-center gap-2">
<span className="text-xs font-semibold text-warm-500 dark:text-warm-400 uppercase tracking-wide">

View file

@ -4,24 +4,13 @@ import { IconButton } from '../ui/IconButton';
import { PillToggle } from '../ui/PillToggle';
import { DestinationDropdown } from '../ui/DestinationDropdown';
import InfoPopup from '../ui/InfoPopup';
import { TravelTimeInfoPopup } from '../ui/TravelTimeInfoPopup';
import { CloseIcon } from '../ui/icons/CloseIcon';
import { EyeIcon } from '../ui/icons/EyeIcon';
import { InfoIcon } from '../ui/icons/InfoIcon';
import { CarIcon } from '../ui/icons/CarIcon';
import { BicycleIcon } from '../ui/icons/BicycleIcon';
import { WalkingIcon } from '../ui/icons/WalkingIcon';
import { TransitIcon } from '../ui/icons/TransitIcon';
import { formatFilterValue } from '../../lib/format';
import { useTravelDestinations } from '../../hooks/useTravelDestinations';
import { MODE_LABELS, type TransportMode } from '../../hooks/useTravelTime';
import type { ComponentType } from 'react';
const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
import { MODE_LABELS, MODE_ICONS, type TransportMode } from '../../hooks/useTravelTime';
interface TravelTimeCardProps {
mode: TransportMode;
@ -118,21 +107,7 @@ export function TravelTimeCard({
</div>
)}
{showInfo && (
<InfoPopup title={`Travel Time (${MODE_LABELS[mode]})`} onClose={() => setShowInfo(false)}>
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">
Shows how long it takes to reach the selected destination from each area
{mode === 'transit'
? ' by public transport (bus, rail, tube). Times are computed across a typical weekday morning window.'
: mode === 'car'
? ' by car, based on typical road speeds and the road network.'
: mode === 'bicycle'
? ' by bicycle, using cycle-friendly routes.'
: ' on foot, using pedestrian paths and pavements.'}{' '}
Use the slider to filter areas within your preferred commute time.
</p>
</InfoPopup>
)}
{showInfo && <TravelTimeInfoPopup mode={mode} onClose={() => setShowInfo(false)} />}
{showBestInfo && (
<InfoPopup title="Best case travel time" onClose={() => setShowBestInfo(false)}>

View file

@ -87,7 +87,7 @@ export default function AuthModal({
if (e.target === e.currentTarget) onClose();
}}
>
<div className="absolute inset-0 bg-black/50 dark:bg-black/70" onMouseDown={onClose} />
<div className="absolute inset-0 bg-black/50 dark:bg-black/70" />
<div className="relative w-full max-w-sm mx-4 bg-white dark:bg-warm-900 rounded-lg shadow-xl border border-warm-200 dark:border-warm-700">
{/* Header */}
<div className="flex items-center justify-between px-5 pt-5 pb-3">

View file

@ -1,6 +1,6 @@
import { useState, useCallback, useEffect } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import { shortenUrl } from '../../lib/api';
import { shortenUrl, prewarmScreenshot } from '../../lib/api';
import { copyToClipboard } from '../../lib/clipboard';
import { DownloadIcon } from './icons/DownloadIcon';
import { BookmarkIcon } from './icons/BookmarkIcon';
@ -96,6 +96,7 @@ export default function Header({
doCopy(window.location.href);
return;
}
prewarmScreenshot(params);
setSharing(true);
try {
const shortUrl = await shortenUrl(params);
@ -243,6 +244,7 @@ export default function Header({
theme={theme}
onToggleTheme={onToggleTheme}
onLogout={onLogout}
onNavigate={onPageChange}
/>
) : (
<>

View file

@ -23,7 +23,7 @@ export default function InfoPopup({ title, children, onClose, sourceLink }: Info
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30 p-4">
<div
ref={popupRef}
className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full max-h-full overflow-y-auto p-5"
className="bg-white dark:bg-warm-800 border border-warm-200 dark:border-warm-700 rounded-lg shadow-xl max-w-md w-full max-h-full overflow-y-auto p-5"
>
<div className="flex items-start justify-between mb-3">
<h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">{title}</h3>

View file

@ -0,0 +1,27 @@
import InfoPopup from './InfoPopup';
import { MODE_LABELS, type TransportMode } from '../../hooks/useTravelTime';
const MODE_INFO: Record<TransportMode, string> = {
transit:
' by public transport (bus, rail, tube). Times are computed across a typical weekday morning window.',
car: ' by car, based on typical road speeds and the road network.',
bicycle: ' by bicycle, using cycle-friendly routes.',
walking: ' on foot, using pedestrian paths and pavements.',
};
export function TravelTimeInfoPopup({
mode,
onClose,
}: {
mode: TransportMode;
onClose: () => void;
}) {
return (
<InfoPopup title={`Travel Time (${MODE_LABELS[mode]})`} onClose={onClose}>
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">
Shows how long it takes to reach the selected destination from each area
{MODE_INFO[mode]} Use the slider to filter areas within your preferred commute time.
</p>
</InfoPopup>
);
}

View file

@ -1,5 +1,7 @@
import { useState, useRef, useEffect } from 'react';
import type { AuthUser } from '../../hooks/useAuth';
import type { Page } from './Header';
import { PAGE_PATHS } from './Header';
import { SunIcon } from './icons/SunIcon';
import { MoonIcon } from './icons/MoonIcon';
@ -8,11 +10,13 @@ export default function UserMenu({
theme,
onToggleTheme,
onLogout,
onNavigate,
}: {
user: AuthUser;
theme: 'light' | 'dark';
onToggleTheme: () => void;
onLogout: () => void;
onNavigate: (page: Page) => void;
}) {
const [open, setOpen] = useState(false);
const menuRef = useRef<HTMLDivElement>(null);
@ -72,8 +76,13 @@ export default function UserMenu({
Theme: {theme === 'light' ? 'Light' : 'Dark'}
</button>
<a
href="/account"
onClick={() => setOpen(false)}
href={PAGE_PATHS.account}
onClick={(e) => {
if (e.metaKey || e.ctrlKey || e.shiftKey || e.button !== 0) return;
e.preventDefault();
setOpen(false);
onNavigate('account');
}}
className="block w-full text-left px-3 py-2 text-sm text-warm-700 dark:text-warm-300 hover:bg-warm-50 dark:hover:bg-warm-700 rounded"
>
Account

View file

@ -17,6 +17,8 @@ export interface AiFiltersResult {
notes: string;
/** Human-readable summary of what was set */
summary: string;
/** The listing mode used (historical/buy/rent) */
listingType: string;
}
export type AiFilterErrorType = 'auth' | 'limit' | 'error';
@ -28,7 +30,11 @@ export interface AiFiltersContext {
}
interface UseAiFiltersResult {
fetchAiFilters: (query: string, context?: AiFiltersContext) => Promise<AiFiltersResult | null>;
fetchAiFilters: (
query: string,
context?: AiFiltersContext,
listingType?: string
) => Promise<AiFiltersResult | null>;
loading: boolean;
error: string | null;
errorType: AiFilterErrorType | null;
@ -41,6 +47,8 @@ function buildSummary(filters: FeatureFilters, travelTimeFilters: AiTravelTimeFi
const parts: string[] = [];
for (const [name, value] of Object.entries(filters)) {
// Skip Listing status — shown via the mode selector UI
if (name === 'Listing status') continue;
if (Array.isArray(value) && value.length === 2 && typeof value[0] === 'number') {
parts.push(name);
} else if (Array.isArray(value)) {
@ -67,7 +75,11 @@ export function useAiFilters(): UseAiFiltersResult {
const abortRef = useRef<AbortController | null>(null);
const fetchAiFilters = useCallback(
async (query: string, context?: AiFiltersContext): Promise<AiFiltersResult | null> => {
async (
query: string,
context?: AiFiltersContext,
listingType?: string
): Promise<AiFiltersResult | null> => {
abortRef.current?.abort();
const controller = new AbortController();
abortRef.current = controller;
@ -81,6 +93,7 @@ export function useAiFilters(): UseAiFiltersResult {
try {
const url = apiUrl('ai-filters');
const bodyObj: Record<string, unknown> = { query };
if (listingType) bodyObj.listing_type = listingType;
if (context) {
bodyObj.context = {
filters: context.filters,
@ -130,6 +143,7 @@ export function useAiFilters(): UseAiFiltersResult {
travelTimeFilters,
notes: json.notes || '',
summary: summaryText,
listingType: json.listing_type || 'historical',
};
setNotes(result.notes || null);
setSummary(summaryText);

View file

@ -95,7 +95,7 @@ export function useDeckLayers({
useEffect(() => {
if (!hasSelection) return;
setMarchTime(0);
const id = setInterval(() => setMarchTime((t) => t + 0.3), 50);
const id = setInterval(() => setMarchTime((t) => (t + 0.3) % 10000), 50);
return () => clearInterval(id);
}, [hasSelection]);

View file

@ -1,4 +1,6 @@
import { useState, useCallback, useMemo } from 'react';
import type { ComponentType } from 'react';
import { CarIcon, BicycleIcon, WalkingIcon, TransitIcon } from '../components/ui/icons';
export type TransportMode = 'car' | 'bicycle' | 'walking' | 'transit';
@ -18,6 +20,13 @@ export const MODE_DESCRIPTIONS: Record<TransportMode, string> = {
transit: 'Journey time by train, tube, and bus',
};
export const MODE_ICONS: Record<TransportMode, ComponentType<{ className?: string }>> = {
car: CarIcon,
bicycle: BicycleIcon,
walking: WalkingIcon,
transit: TransitIcon,
};
export interface TravelTimeEntry {
mode: TransportMode;
slug: string;

View file

@ -59,13 +59,13 @@ const STEPS: Step[] = [
},
];
export function useTutorial(initialLoading: boolean, isMobile: boolean) {
export function useTutorial(initialLoading: boolean, isMobile: boolean, blocked = false) {
const [run, setRun] = useState(() => {
if (isMobile) return false;
return !localStorage.getItem(STORAGE_KEY);
});
const shouldRun = run && !initialLoading && !isMobile;
const shouldRun = run && !initialLoading && !isMobile && !blocked;
const handleCallback = useCallback((data: CallBackProps) => {
const { status, action, type } = data;

View file

@ -59,6 +59,12 @@ export async function fetchWithRetry<T>(
}
}
/** Fire-and-forget request to pre-warm the screenshot cache for OG images. */
export function prewarmScreenshot(params: string): void {
fetch(apiUrl('screenshot', new URLSearchParams(`og=1&${params}`)), authHeaders())
.catch(() => {}); // best-effort, don't care if it fails
}
export async function shortenUrl(params: string): Promise<string> {
const res = await fetch(apiUrl('shorten'), {
method: 'POST',

View file

@ -1,7 +1,18 @@
/** Copy text to clipboard with execCommand fallback for older browsers. */
export function copyToClipboard(text: string, onSuccess: () => void): void {
if (navigator.clipboard?.writeText) {
navigator.clipboard.writeText(text).then(onSuccess);
navigator.clipboard.writeText(text).then(onSuccess).catch(() => {
// Fallback if clipboard permission denied
const ta = document.createElement('textarea');
ta.value = text;
ta.style.position = 'fixed';
ta.style.opacity = '0';
document.body.appendChild(ta);
ta.select();
document.execCommand('copy');
document.body.removeChild(ta);
onSuccess();
});
} else {
const ta = document.createElement('textarea');
ta.value = text;

View file

@ -35,7 +35,7 @@ export const ZOOM_TO_RESOLUTION_THRESHOLDS = [
{ maxZoom: 13, resolution: 9 },
] as const;
export const POSTCODE_ZOOM_THRESHOLD = 16;
export const POSTCODE_ZOOM_THRESHOLD = 15;
export const FEATURE_GRADIENT: { t: number; color: [number, number, number] }[] = [
{ t: 0, color: [46, 204, 113] },
@ -183,8 +183,8 @@ export const STACKED_ENUM_GROUPS: Record<
},
{
label: 'Leasehold/Freehold',
feature: 'Leashold/Freehold',
components: ['Leashold/Freehold'],
feature: 'Leasehold/Freehold',
components: ['Leasehold/Freehold'],
valueOrder: ['Freehold', 'Leasehold'],
valueColors: ['#3b82f6', '#f59e0b'],
},

View file

@ -49,24 +49,57 @@ const RIGHTMOVE_PRICES = [
3000000, 4000000, 5000000, 7500000, 10000000, 15000000, 20000000,
];
function nearestRadius(target: number, allowed: number[]): number {
return allowed.reduce((best, r) => (Math.abs(r - target) < Math.abs(best - target) ? r : best));
}
// Rightmove allowed monthly rent values (pcm)
const RIGHTMOVE_RENTS = [
250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000,
3500, 4000, 5000, 7500, 10000, 15000, 25000,
];
/** Snap minPrice down and maxPrice up so Rightmove doesn't ignore them */
function snapRightmovePrice(value: number, direction: 'floor' | 'ceil'): number {
// OnTheMarket allowed buy prices
const OTM_PRICES = [
50000, 60000, 70000, 80000, 90000, 100000, 110000, 120000, 125000, 130000, 140000, 150000,
160000, 170000, 175000, 180000, 190000, 200000, 210000, 220000, 230000, 240000, 250000, 275000,
300000, 325000, 350000, 375000, 400000, 425000, 450000, 475000, 500000, 550000, 600000, 650000,
700000, 750000, 800000, 900000, 1000000, 1250000, 1500000, 2000000, 2500000, 3000000, 5000000,
7500000, 10000000, 15000000,
];
// OnTheMarket allowed monthly rent values (pcm)
const OTM_RENTS = [
100, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950, 1000,
1100, 1200, 1250, 1300, 1400, 1500, 1750, 2000, 2500, 3000, 3500, 4000, 5000, 7500, 10000,
25000,
];
// Zoopla allowed buy prices
const ZOOPLA_PRICES = [
10000, 25000, 50000, 75000, 100000, 125000, 150000, 175000, 200000, 225000, 250000, 275000,
300000, 325000, 350000, 375000, 400000, 425000, 450000, 475000, 500000, 550000, 600000, 650000,
700000, 800000, 900000, 1000000, 1250000, 1500000, 1750000, 2000000, 2500000, 3000000, 4000000,
5000000, 7500000, 10000000, 15000000,
];
// Zoopla allowed monthly rent values (pcm)
const ZOOPLA_RENTS = [
100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000, 3500,
4000, 5000, 7500, 10000, 25000,
];
function snapToAllowed(value: number, allowed: number[], direction: 'floor' | 'ceil'): number {
if (direction === 'floor') {
// Largest supported value <= target
for (let i = RIGHTMOVE_PRICES.length - 1; i >= 0; i--) {
if (RIGHTMOVE_PRICES[i] <= value) return RIGHTMOVE_PRICES[i];
for (let i = allowed.length - 1; i >= 0; i--) {
if (allowed[i] <= value) return allowed[i];
}
return RIGHTMOVE_PRICES[0];
return allowed[0];
}
// Smallest supported value >= target
for (const p of RIGHTMOVE_PRICES) {
for (const p of allowed) {
if (p >= value) return p;
}
return RIGHTMOVE_PRICES[RIGHTMOVE_PRICES.length - 1];
return allowed[allowed.length - 1];
}
function nearestRadius(target: number, allowed: number[]): number {
return allowed.reduce((best, r) => (Math.abs(r - target) < Math.abs(best - target) ? r : best));
}
interface SearchUrlOptions {
@ -90,7 +123,17 @@ export function buildPropertySearchUrls({
const radiusMiles = isPostcode ? 0.25 : (H3_RADIUS_MILES[resolution] ?? 1);
const priceFilter = filters['Last known price'];
const listingStatus = filters['Listing status'];
const isRent =
Array.isArray(listingStatus) &&
typeof listingStatus[0] === 'string' &&
(listingStatus as string[]).includes('For rent');
// Check price filters in priority order: asking price (current listings) > estimated > last known
// For rent mode, check asking rent first
const priceFilter = isRent
? filters['Asking rent (monthly)']
: (filters['Asking price'] ?? filters['Estimated current price'] ?? filters['Last known price']);
const minPrice =
Array.isArray(priceFilter) && typeof priceFilter[0] === 'number' ? priceFilter[0] : undefined;
const maxPrice =
@ -131,15 +174,16 @@ export function buildPropertySearchUrls({
// Rightmove — requires locationIdentifier from typeahead API
let rightmove: string | null = null;
if (rightmoveLocationId) {
const rmPrices = isRent ? RIGHTMOVE_RENTS : RIGHTMOVE_PRICES;
const rmParams = new URLSearchParams();
rmParams.set('searchLocation', postcode);
rmParams.set('useLocationIdentifier', 'true');
rmParams.set('locationIdentifier', rightmoveLocationId);
rmParams.set('radius', String(nearestRadius(radiusMiles, RIGHTMOVE_RADII)));
if (minPrice !== undefined)
rmParams.set('minPrice', String(snapRightmovePrice(minPrice, 'floor')));
rmParams.set('minPrice', String(snapToAllowed(minPrice, rmPrices, 'floor')));
if (maxPrice !== undefined)
rmParams.set('maxPrice', String(snapRightmovePrice(maxPrice, 'ceil')));
rmParams.set('maxPrice', String(snapToAllowed(maxPrice, rmPrices, 'ceil')));
if (minBedrooms !== undefined) rmParams.set('minBedrooms', String(Math.floor(minBedrooms)));
if (maxBedrooms !== undefined) rmParams.set('maxBedrooms', String(Math.ceil(maxBedrooms)));
if (minBathrooms !== undefined) rmParams.set('minBathrooms', String(Math.floor(minBathrooms)));
@ -155,20 +199,24 @@ export function buildPropertySearchUrls({
];
if (rmTypes.length > 0) rmParams.set('propertyTypes', rmTypes.join(','));
}
if (selectedTenures.length > 0) {
if (!isRent && selectedTenures.length > 0) {
const rmTenures = selectedTenures.map((t) => (t === 'Freehold' ? 'FREEHOLD' : 'LEASEHOLD'));
rmParams.set('tenureTypes', rmTenures.join(','));
}
rmParams.set('_includeSSTC', 'on');
rightmove = `https://www.rightmove.co.uk/property-for-sale/find.html?${rmParams.toString()}`;
if (!isRent) rmParams.set('_includeSSTC', 'on');
const rmPath = isRent ? 'property-to-rent' : 'property-for-sale';
rightmove = `https://www.rightmove.co.uk/${rmPath}/find.html?${rmParams.toString()}`;
}
// OnTheMarket — postcode slug in URL path (e.g. "SW1A 1AA" → "sw1a-1aa")
const otmSlug = postcode.toLowerCase().replace(/\s+/g, '-');
const otmPrices = isRent ? OTM_RENTS : OTM_PRICES;
const otmParams = new URLSearchParams();
otmParams.set('radius', String(nearestRadius(radiusMiles, OTM_RADII)));
if (minPrice !== undefined) otmParams.set('min-price', String(Math.round(minPrice)));
if (maxPrice !== undefined) otmParams.set('max-price', String(Math.round(maxPrice)));
if (minPrice !== undefined)
otmParams.set('min-price', String(snapToAllowed(minPrice, otmPrices, 'floor')));
if (maxPrice !== undefined)
otmParams.set('max-price', String(snapToAllowed(maxPrice, otmPrices, 'ceil')));
if (selectedTypes.length > 0) {
const otmTypes = [
...new Set(selectedTypes.map((t) => PROPERTY_TYPE_MAP[t]?.onthemarket).filter(Boolean)),
@ -178,15 +226,20 @@ export function buildPropertySearchUrls({
}
}
otmParams.set('view', 'map-list');
const onthemarket = `https://www.onthemarket.com/for-sale/property/${otmSlug}/?${otmParams.toString()}`;
const otmPath = isRent ? 'to-rent' : 'for-sale';
const onthemarket = `https://www.onthemarket.com/${otmPath}/property/${otmSlug}/?${otmParams.toString()}`;
// Zoopla
const zPrices = isRent ? ZOOPLA_RENTS : ZOOPLA_PRICES;
const zParams = new URLSearchParams();
zParams.set('q', postcode);
zParams.set('search_source', 'for-sale');
const zSearchSource = isRent ? 'to-rent' : 'for-sale';
zParams.set('search_source', zSearchSource);
zParams.set('radius', String(nearestRadius(radiusMiles, ZOOPLA_RADII)));
if (minPrice !== undefined) zParams.set('price_min', String(Math.round(minPrice)));
if (maxPrice !== undefined) zParams.set('price_max', String(Math.round(maxPrice)));
if (minPrice !== undefined)
zParams.set('price_min', String(snapToAllowed(minPrice, zPrices, 'floor')));
if (maxPrice !== undefined)
zParams.set('price_max', String(snapToAllowed(maxPrice, zPrices, 'ceil')));
if (selectedTypes.length > 0) {
const zTypes = [
...new Set(selectedTypes.map((t) => PROPERTY_TYPE_MAP[t]?.zoopla).filter(Boolean)),
@ -195,14 +248,9 @@ export function buildPropertySearchUrls({
zParams.append('property_sub_type', zt!);
}
}
const zoopla = `https://www.zoopla.co.uk/for-sale/property/?${zParams.toString()}`;
const zoopla = `https://www.zoopla.co.uk/${zSearchSource}/property/?${zParams.toString()}`;
// OpenRent — rent mode only
const listingStatus = filters['Listing status'];
const isRent =
Array.isArray(listingStatus) &&
typeof listingStatus[0] === 'string' &&
(listingStatus as string[]).includes('For rent');
let openrent: string | null = null;
if (isRent) {
const postcodeNoSpaces = postcode.replace(/\s+/g, '');

View file

@ -23,6 +23,26 @@ export function formatFilterValue(value: number, raw?: boolean): string {
return value.toFixed(2);
}
/** Parse a user-typed value like "250k", "1.2M", "£300000", "50 sqm" back to a number. */
export function parseInputValue(
text: string,
opts?: { prefix?: string; suffix?: string; step?: number }
): number | null {
let s = text.trim();
if (opts?.prefix) s = s.replace(new RegExp(`^\\${opts.prefix}`), '');
if (opts?.suffix) s = s.replace(new RegExp(`${opts.suffix.trim()}$`), '');
s = s.trim().replace(/,/g, '');
const m = s.match(/^(-?\d+\.?\d*)\s*([kKmM]?)$/);
if (!m) return null;
let val = parseFloat(m[1]);
if (isNaN(val)) return null;
const unit = m[2].toLowerCase();
if (unit === 'k') val *= 1_000;
else if (unit === 'm') val *= 1_000_000;
if (opts?.step) val = Math.round(val / opts.step) * opts.step;
return val;
}
export function formatDuration(d: string): string {
if (d === 'F') return 'Freehold';
if (d === 'L') return 'Leasehold';

View file

@ -186,11 +186,11 @@ def _build(
lsoa_pop = pl.scan_parquet(lsoa_population_path)
wide = wide.join(lsoa_pop, on="lsoa21", how="left")
wide = wide.with_columns(
(pl.col("serious_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
pl.when(pl.col("population") > 0)
.then((pl.col("serious_crime_avg_yr") / pl.col("population") * 1000).round(1))
.alias("serious_crime_per_1k"),
(pl.col("minor_crime_avg_yr") / pl.col("population") * 1000)
.round(1)
pl.when(pl.col("population") > 0)
.then((pl.col("minor_crime_avg_yr") / pl.col("population") * 1000).round(1))
.alias("minor_crime_per_1k"),
).drop("population")

View file

@ -37,4 +37,4 @@ def extract_zip(zip_path: Path, extract_dir: Path) -> None:
"""Extract a ZIP archive into the given directory."""
extract_dir.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(extract_dir)
zf.extractall(extract_dir, filter="data")

View file

@ -84,7 +84,7 @@ def fuzzy_join_on_postcode(
right_match["_right_postcode"],
right_match["_right_address"],
):
if postcode is not None:
if address is not None and postcode is not None:
right_by_postcode.setdefault(postcode, []).append((idx, address))
# Group left side by postcode

View file

@ -5,7 +5,7 @@ import { NetworkCache } from './network-cache.js';
const VIEWPORT = { width: 1200, height: 630 };
const NAVIGATION_TIMEOUT = 15_000;
const READY_TIMEOUT = 15_000;
const RENDER_BUFFER_MS = 200;
const RENDER_BUFFER_MS = 500;
const POOL_SIZE = 3;
let browser: Browser | null = null;
@ -262,9 +262,10 @@ export async function takeScreenshot(url: string, authHeader?: string): Promise<
const t2 = performance.now();
console.log(` Ready: ${(t2 - t1).toFixed(0)}ms`);
// Brief buffer for SwiftShader to finish rendering the WebGL frame.
// Reduced from 500ms → 200ms since tiles now load from the in-memory
// cache and don't need network round-trips.
// Buffer for SwiftShader to finish rendering the WebGL frame after
// __screenshot_ready fires. The frontend uses double-rAF before signaling,
// so one paint cycle has already completed — this is extra safety for
// compositor staging and any residual tile/layer rendering.
await page.waitForTimeout(RENDER_BUFFER_MS);
// JPEG at quality 85: ~3-5x faster encoding than PNG with negligible

View file

@ -1,15 +1,21 @@
#!/usr/bin/env -S uv run --project ../finder
"""Zoopla scraping experiment — Playwright with stealth + network interception.
"""Zoopla scraping experiment — working prototype using Camoufox.
Zoopla uses Next.js App Router with React Server Components. The listing data
is NOT in __NEXT_DATA__ or the initial HTML it's fetched client-side after
hydration. This means we need a real browser that:
1. Passes Cloudflare's bot detection
2. Executes JavaScript to trigger the client-side data fetch
3. Intercepts the network response OR scrapes the rendered DOM
Key findings:
- Zoopla uses Cloudflare Turnstile (managed interactive challenge)
- Playwright headless Chromium + stealth patches CANNOT beat it
- Camoufox (anti-fingerprinting Firefox fork) PASSES Cloudflare
- Zoopla uses Next.js App Router with React Server Components (RSC)
- Listing data is NOT in __NEXT_DATA__ it's server-rendered in RSC stream
- URL-based location slugs (e.g. /properties/london/) return 0 results
- Must use the search autocomplete (GraphQL: getGeoSuggestion) to resolve
a location, then submit the form to get results
- GraphQL endpoint: api-graphql-lambda.prod.zoopla.co.uk/graphql
- Listings loaded via getTopLeadListingIds + getRareFindLeadListingIds ops
Usage:
uv run --project finder scripts/zoopla_experiment.py [OUTCODE]
uv run --project finder scripts/zoopla_experiment.py [LOCATION]
uv run --project finder scripts/zoopla_experiment.py "Tower Hamlets"
"""
import json
@ -25,294 +31,250 @@ logging.basicConfig(
)
log = logging.getLogger("zoopla-exp")
ZOOPLA_BASE = "https://www.zoopla.co.uk"
CHANNELS = {
"BUY": "for-sale",
"RENT": "to-rent",
}
def scrape_zoopla(location: str = "London", channel: str = "BUY"):
from camoufox.sync_api import Camoufox
tab_label = "Buy" if channel == "BUY" else "Rent"
log.info("Scraping Zoopla: location=%s channel=%s", location, channel)
def run_playwright_stealth(outcode: str, channel: str = "BUY"):
"""Use Playwright with stealth patches to scrape Zoopla.
with Camoufox(headless=True) as browser:
page = browser.new_page()
Strategy:
1. Launch stealth browser to bypass Cloudflare
2. Navigate to search page
3. Wait for listings to render (client-side hydration)
4. Try two extraction methods:
a. Intercept network requests for API data (cleanest)
b. Parse the rendered DOM (fallback)
"""
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
# Intercept GraphQL responses
graphql_responses = []
url_segment = CHANNELS[channel]
search_url = f"{ZOOPLA_BASE}/{url_segment}/properties/{outcode.lower()}/"
log.info("Target: %s", search_url)
intercepted_data = []
def handle_response(response):
"""Capture any API responses that look like listing data."""
url = response.url
# Look for API/data endpoints
if any(kw in url for kw in ["/api/", "graphql", "search", "listing", "property"]):
try:
if "application/json" in (response.headers.get("content-type", "")):
def on_resp(response):
url = response.url
ct = response.headers.get("content-type", "")
if "json" in ct and "graphql" in url:
try:
body = response.json()
intercepted_data.append({"url": url, "data": body})
log.info(" [intercepted] %s (%s)", url[:100], type(body).__name__)
req = response.request.post_data or ""
graphql_responses.append({"body": body, "req": req})
except Exception:
pass
page.on("response", on_resp)
# Step 1: Load homepage and pass Cloudflare
log.info("Loading Zoopla homepage...")
page.goto("https://www.zoopla.co.uk/", wait_until="domcontentloaded", timeout=60000)
for i in range(20):
if "Just a moment" not in page.title():
break
time.sleep(3)
else:
log.error("Cloudflare did not resolve after 60s")
return []
log.info("Homepage loaded: %s", page.title())
time.sleep(3)
# Step 2: Dismiss cookie consent (shadow DOM)
page.evaluate("""() => {
const aside = document.querySelector('#usercentrics-cmp-ui');
if (aside && aside.shadowRoot) {
const btns = aside.shadowRoot.querySelectorAll('button');
for (const btn of btns) {
if (btn.innerText.includes('Accept')) { btn.click(); return; }
}
}
aside?.remove();
}""")
time.sleep(2)
# Step 3: Select Buy/Rent tab if needed
if channel == "RENT":
rent_tab = page.query_selector('button:has-text("Rent")') or page.query_selector(f'[role="tab"]:has-text("{tab_label}")')
if rent_tab:
rent_tab.click()
time.sleep(1)
# Step 4: Type location into search and select autocomplete suggestion
log.info("Searching for '%s'...", location)
search_input = (
page.query_selector('input[name="autosuggest-input"]')
or page.query_selector('input[type="text"]')
)
if not search_input:
log.error("Could not find search input")
return []
search_input.click()
time.sleep(0.5)
search_input.fill("") # Clear any existing text
search_input.type(location, delay=80)
time.sleep(3)
# Select first autocomplete suggestion
first_option = page.query_selector('[role="option"]')
if first_option:
suggestion_text = first_option.inner_text()
log.info("Selecting suggestion: %s", suggestion_text)
first_option.click()
time.sleep(1)
else:
log.warning("No autocomplete suggestions appeared")
# Step 5: Submit search
search_btn = page.query_selector('button:has-text("Search")')
if search_btn:
search_btn.click()
else:
search_input.press("Enter")
log.info("Waiting for results...")
time.sleep(10)
final_url = page.url
final_title = page.title()
log.info("URL: %s", final_url)
log.info("Title: %s", final_title)
# Step 6: Extract listings from rendered DOM
listings = page.evaluate(r"""() => {
const links = Array.from(document.querySelectorAll(
'a[href*="/for-sale/details/"], a[href*="/new-homes/details/"], a[href*="/to-rent/details/"]'
));
const seen = new Set();
const results = [];
for (const link of links) {
const href = link.href;
const match = href.match(/\/details\/(\d+)\//);
if (!match) continue;
const id = match[1];
if (seen.has(id)) continue;
seen.add(id);
// Walk up to find the listing card container
let card = link;
for (let j = 0; j < 10; j++) {
card = card.parentElement;
if (!card) break;
const text = card.innerText || '';
// A listing card should have a price and at least beds or area
if (text.includes('£') && (text.includes('bed') || text.includes('sq ft'))) {
break;
}
}
if (!card) continue;
const text = card.innerText || '';
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
const priceMatch = text.match(/£([\d,]+)/);
const bedsMatch = text.match(/(\d+)\s*beds?/i);
const bathsMatch = text.match(/(\d+)\s*baths?/i);
const recMatch = text.match(/(\d+)\s*reception/i);
const areaMatch = text.match(/([\d,]+)\s*sq\s*ft/i);
// Try to find address usually a line with a postcode or comma-separated location
let address = '';
for (const line of lines) {
if (/[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i.test(line) ||
(line.includes(',') && !line.includes('£') && !line.match(/^\d+ beds?/i))) {
address = line;
break;
}
}
// Tenure
let tenure = '';
if (/freehold/i.test(text)) tenure = 'Freehold';
else if (/leasehold/i.test(text)) tenure = 'Leasehold';
results.push({
id: id,
url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
beds: bedsMatch ? parseInt(bedsMatch[1]) : null,
baths: bathsMatch ? parseInt(bathsMatch[1]) : null,
receptions: recMatch ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address: address,
tenure: tenure,
text_preview: lines.slice(0, 10).join(' | '),
});
}
return results;
}""")
log.info("Extracted %d unique listings from page 1", len(listings))
# Step 7: Check for results count and pagination
body_text = page.inner_text("body")
count_match = re.search(r"([\d,]+)\s+results?", body_text)
total_results = int(count_match.group(1).replace(",", "")) if count_match else len(listings)
log.info("Total results: %d", total_results)
# Step 8: Log GraphQL operations we saw
log.info("GraphQL operations intercepted:")
for gql in graphql_responses:
try:
req = json.loads(gql["req"])
op = req.get("operationName", "?")
log.info(" - %s", op)
except Exception:
pass
with sync_playwright() as p:
# Launch with stealth-friendly args
browser = p.chromium.launch(
headless=True,
args=[
"--disable-blink-features=AutomationControlled",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-web-security",
"--lang=en-GB",
],
)
context = browser.new_context(
locale="en-GB",
timezone_id="Europe/London",
viewport={"width": 1920, "height": 1080},
user_agent=(
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
),
)
page = context.new_page()
# Apply stealth patches (Linux platform, Chrome UA)
stealth = Stealth(
navigator_platform_override="Linux x86_64",
navigator_languages_override=("en-GB", "en"),
)
stealth.apply_stealth_sync(page)
# Listen for responses to intercept API data
page.on("response", handle_response)
# Navigate
log.info("Navigating to %s ...", search_url)
try:
page.goto(search_url, wait_until="domcontentloaded", timeout=60000)
except Exception as e:
log.error("Navigation failed: %s", e)
browser.close()
return
# Wait for Cloudflare to resolve
log.info("Waiting for Cloudflare challenge to resolve ...")
for attempt in range(20):
content = page.content()
title = page.title()
if "Just a moment" in content and "challenge" in content.lower():
log.info(" Cloudflare challenge still active (%d/20) title=%s", attempt + 1, title)
time.sleep(3)
else:
log.info(" Challenge resolved! title=%s", title)
break
else:
log.error("Cloudflare challenge did not resolve")
# Dump page content for debugging
print("\n=== Cloudflare challenge page ===")
print(page.content()[:3000])
browser.close()
return
# Wait for actual content to render
log.info("Waiting for listing content to render ...")
try:
# Try waiting for property cards to appear
page.wait_for_selector(
'[data-testid="search-result"], [data-testid="regular-listings"], '
'.listing-results, .css-kdnlof, [class*="ListingCard"], '
'[class*="listing"], [class*="PropertyCard"]',
timeout=15000,
)
log.info("Listing elements found in DOM!")
except Exception:
log.warning("No listing elements found by selector. Trying to wait for prices...")
try:
page.wait_for_function(
"document.querySelectorAll('a[href*=\"/for-sale/details/\"]').length > 0",
timeout=15000,
)
log.info("Listing links found in DOM!")
except Exception:
log.warning("No listing links either. Page may still be loading or we're blocked.")
# Give hydration a moment
time.sleep(3)
# --- Extraction Method A: Check intercepted network data ---
if intercepted_data:
print(f"\n=== Intercepted {len(intercepted_data)} API responses ===")
for item in intercepted_data:
print(f"\nURL: {item['url'][:150]}")
data = item["data"]
if isinstance(data, dict):
print(f"Keys: {list(data.keys())[:15]}")
# Look for listings inside
for k, v in data.items():
if isinstance(v, list) and len(v) > 2 and isinstance(v[0], dict):
print(f" {k}: list of {len(v)} items, [0] keys={list(v[0].keys())[:10]}")
elif isinstance(data, list) and data:
print(f"Array of {len(data)} items")
if isinstance(data[0], dict):
print(f" [0] keys: {list(data[0].keys())[:15]}")
print(json.dumps(data, indent=2, default=str, ensure_ascii=False)[:2000])
# --- Extraction Method B: Parse rendered DOM ---
log.info("Extracting from rendered DOM ...")
# Get full page content after hydration
content = page.content()
# Find listing URLs
listing_urls = re.findall(r'href="(/for-sale/details/\d+/[^"]*)"', content)
log.info("Found %d listing detail links", len(listing_urls))
# Find prices
prices = re.findall(r'£([\d,]+)', content)
log.info("Found %d price strings", len(prices))
if prices:
log.info("Prices: %s", prices[:10])
# Try to extract structured listing data from the page
listings = page.evaluate("""() => {
// Try to find listing cards via various selectors
const selectors = [
'[data-testid="search-result"]',
'[data-testid="regular-listings"] > div',
'a[href*="/for-sale/details/"]',
'[class*="ListingCard"]',
'[class*="listing-result"]',
];
for (const sel of selectors) {
const elements = document.querySelectorAll(sel);
if (elements.length > 2) {
return {
selector: sel,
count: elements.length,
// Get text and href from first 3
samples: Array.from(elements).slice(0, 3).map(el => ({
text: el.innerText?.substring(0, 300),
href: el.href || el.querySelector('a')?.href || '',
html: el.outerHTML?.substring(0, 500),
}))
};
}
}
// Fallback: find all links to listing detail pages
const links = Array.from(document.querySelectorAll('a[href*="/details/"]'));
if (links.length > 0) {
return {
selector: 'a[href*="/details/"]',
count: links.length,
samples: links.slice(0, 5).map(el => ({
text: el.innerText?.substring(0, 300),
href: el.href,
parentText: el.closest('div, li, article')?.innerText?.substring(0, 500) || '',
}))
};
}
// Last resort: get page structure
return {
selector: 'none',
count: 0,
bodyText: document.body?.innerText?.substring(0, 2000),
title: document.title,
};
}""")
print(f"\n=== DOM Extraction Results ===")
print(json.dumps(listings, indent=2, ensure_ascii=False)[:5000])
# Also extract cookies for potential reuse
cookies = context.cookies()
zoopla_cookies = {c["name"]: c["value"] for c in cookies if ".zoopla.co.uk" in c.get("domain", "")}
# Step 9: Extract cookies for potential curl_cffi reuse
cookies = page.context.cookies()
session_cookies = {
c["name"]: c["value"]
for c in cookies
if "zoopla" in c.get("domain", "") or "cf" in c.get("name", "").lower()
}
ua = page.evaluate("navigator.userAgent")
print(f"\n=== Session Info ===")
print(f"Cookies ({len(zoopla_cookies)}): {list(zoopla_cookies.keys())}")
print(f"User-Agent: {ua}")
if zoopla_cookies:
# Save cookies for reuse
print(f"\n=== Reusable cookie env vars ===")
for name, value in zoopla_cookies.items():
print(f" {name}={value[:50]}...")
# --- Try a detail page if we found any listing URLs ---
if listing_urls:
detail_path = listing_urls[0]
detail_url = f"{ZOOPLA_BASE}{detail_path}"
log.info("--- Fetching detail page: %s ---", detail_url)
time.sleep(2)
page.goto(detail_url, wait_until="domcontentloaded", timeout=30000)
time.sleep(5) # Let it hydrate
detail = page.evaluate("""() => {
const result = {};
// Price
const priceEl = document.querySelector('[data-testid="price"]')
|| document.querySelector('[class*="price"]');
result.price = priceEl?.innerText || '';
// Address
const addrEl = document.querySelector('[data-testid="address-label"]')
|| document.querySelector('h1') || document.querySelector('address');
result.address = addrEl?.innerText || '';
// Key features
const features = Array.from(document.querySelectorAll('[data-testid="listing_feature"] li, [class*="feature"] li'));
result.features = features.map(f => f.innerText).slice(0, 15);
// Bedrooms/bathrooms from icons or text
const specs = document.querySelectorAll('[data-testid="beds-label"], [data-testid="baths-label"], [class*="bed"], [class*="bath"]');
result.specs = Array.from(specs).map(s => s.innerText).slice(0, 5);
// Description
const desc = document.querySelector('[data-testid="listing_description"], [class*="description"]');
result.description = desc?.innerText?.substring(0, 500) || '';
// Agent
const agent = document.querySelector('[data-testid="agent-details"], [class*="agent"]');
result.agent = agent?.innerText?.substring(0, 200) || '';
// Full page text summary
result.pageTitle = document.title;
result.bodyPreview = document.body?.innerText?.substring(0, 1000);
return result;
}""")
print(f"\n=== Detail Page Data ===")
print(json.dumps(detail, indent=2, ensure_ascii=False)[:3000])
browser.close()
return {
"url": final_url,
"title": final_title,
"total_results": total_results,
"listings": listings,
"cookies": session_cookies,
"user_agent": ua,
}
def main():
outcode = sys.argv[1] if len(sys.argv) > 1 else "E1"
channel = "BUY"
log.info("=== Zoopla Scraping Experiment (Playwright Stealth) ===")
log.info("Outcode: %s, Channel: %s", outcode, channel)
run_playwright_stealth(outcode, channel)
log.info("=== Done ===")
location = sys.argv[1] if len(sys.argv) > 1 else "London"
result = scrape_zoopla(location, channel="BUY")
if not result:
log.error("Scraping failed")
sys.exit(1)
listings = result["listings"]
print(f"\n{'='*60}")
print(f" Zoopla: {result['title']}")
print(f" URL: {result['url']}")
print(f" Total: {result['total_results']} results, {len(listings)} extracted")
print(f"{'='*60}\n")
for i, listing in enumerate(listings):
print(f"--- Listing {i+1}: {listing['url']} ---")
display = {k: v for k, v in listing.items() if k != "text_preview" and v}
print(json.dumps(display, indent=2, ensure_ascii=False))
print()
# Summary stats
prices = [l["price"] for l in listings if l["price"]]
beds = [l["beds"] for l in listings if l["beds"]]
if prices:
print(f"Price range: £{min(prices):,} - £{max(prices):,}")
print(f"Median: £{sorted(prices)[len(prices)//2]:,}")
if beds:
print(f"Bedrooms: {min(beds)}-{max(beds)}")
# Cookie info for reuse
print(f"\nSession cookies ({len(result['cookies'])} cookies)")
print(f"User-Agent: {result['user_agent']}")
if __name__ == "__main__":

View file

@ -23,7 +23,7 @@ pub struct POIData {
/// Byte offset into `id_buffer` where each row's ID starts.
id_offsets: Vec<u32>,
/// Length in bytes of each row's ID.
id_lengths: Vec<u8>,
id_lengths: Vec<u16>,
pub group: InternedColumn,
pub category: InternedColumn,
pub name: Vec<String>,
@ -101,7 +101,7 @@ impl POIData {
let mut id_lengths = Vec::with_capacity(row_count);
for s in &id_raw {
let offset = id_buffer.len() as u32;
let length = s.len().min(u8::MAX as usize) as u8;
let length = s.len().min(u16::MAX as usize) as u16;
id_offsets.push(offset);
id_lengths.push(length);
id_buffer.push_str(&s[..length as usize]);

View file

@ -128,6 +128,7 @@ impl PostcodeData {
// Compute centroid across all vertices from all rings
let total_vertices: usize = rings.iter().map(|ring| ring.len()).sum();
let centroid = if total_vertices == 0 {
tracing::warn!(postcode = %postcode, "Postcode polygon has zero vertices, defaulting centroid to (0,0)");
(0.0, 0.0)
} else {
let mut sum_lat: f32 = 0.0;

View file

@ -68,9 +68,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
features: &[
FeatureConfig {
name: "Last known price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Most recent sale price from the Land Registry",
@ -79,15 +79,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["historical"],
linked: "",
},
FeatureConfig {
name: "Estimated current price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Inflation-adjusted estimate of the current property value",
@ -96,7 +96,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["historical"],
linked: "Asking price",
},
@ -252,9 +252,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
},
FeatureConfig {
name: "Asking price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 10000.0,
description: "Listed asking price for properties currently for sale",
@ -263,15 +263,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "",
raw: false,
absolute: true,
absolute: false,
modes: &["buy"],
linked: "Estimated current price",
},
FeatureConfig {
name: "Asking rent (monthly)",
bounds: Bounds::Fixed {
min: 0.0,
max: 10_000.0,
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 50.0,
description: "Listed monthly rent for properties currently for rent",
@ -280,7 +280,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
prefix: "£",
suffix: "/mo",
raw: false,
absolute: true,
absolute: false,
modes: &["rent"],
linked: "Estimated monthly rent",
},
@ -870,7 +870,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as South Asian",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Indian, Pakistani, Bangladeshi, or any other Asian background.",
source: "ethnicity",
@ -887,7 +887,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as East Asian",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Chinese.",
source: "ethnicity",
@ -904,7 +904,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Black",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Black, Black British, Caribbean, or African.",
source: "ethnicity",
@ -921,7 +921,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Mixed or Multiple ethnic groups",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Mixed or Multiple ethnic groups (White and Black Caribbean, White and Black African, White and Asian, or any other Mixed or Multiple background).",
source: "ethnicity",
@ -938,7 +938,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
min: 0.0,
max: 100.0,
},
step: 1.0,
step: 0.1,
description: "Percentage of population identifying as Other ethnic group",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Other ethnic group (Arab or any other ethnic group not covered by the main categories).",
source: "ethnicity",

View file

@ -365,6 +365,7 @@ async fn main() -> anyhow::Result<()> {
info!("Precomputed AI filters system prompt");
let token_cache = Arc::new(auth::TokenCache::new());
let superuser_token_cache = Arc::new(pocketbase::SuperuserTokenCache::new());
let app_state = AppState {
data: property_data,
@ -392,6 +393,7 @@ async fn main() -> anyhow::Result<()> {
gemini_model: cli.gemini_model,
travel_time_store,
token_cache,
superuser_token_cache,
ai_filters_system_prompt,
google_maps_api_key: cli.google_maps_api_key,
stripe_secret_key: cli.stripe_secret_key,

View file

@ -65,6 +65,14 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
format!("{}/api/screenshot?og=1&{}", state.public_url, query_string)
};
let og_url = if query_string.is_empty() {
format!("{}{}", state.public_url, path)
} else {
format!("{}{}?{}", state.public_url, path, query_string)
};
let og_logo = format!("{}/favicon.svg", state.public_url);
let (og_title, og_description) = if is_invite {
(
"You\u{2019}re invited to Perfect Postcode",
@ -81,6 +89,8 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
r#"<meta property="og:title" content="{og_title}" />
<meta property="og:description" content="{og_description}" />
<meta property="og:type" content="website" />
<meta property="og:url" content="{og_url}" />
<meta property="og:logo" content="{og_logo}" />
<meta property="og:image" content="{og_image_url}" />
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />

View file

@ -54,16 +54,21 @@ pub fn parse_filters(
// Check if this is an enum feature
if let Some(values) = enum_values.get(&feat_idx) {
// Enum filter: convert string values to u16 indices
let allowed: FxHashSet<u16> = rest
.split('|')
.filter_map(|value| {
let value = value.trim();
values
.iter()
.position(|existing| existing == value)
.map(|position| position as u16)
})
.collect();
let mut allowed: FxHashSet<u16> = FxHashSet::default();
for value in rest.split('|') {
let value = value.trim();
match values.iter().position(|existing| existing == value) {
Some(position) => {
allowed.insert(position as u16);
}
None => {
return Err(format!(
"Unknown value '{}' for enum feature '{}'. Valid values: {:?}",
value, name, values
));
}
}
}
enums.push(ParsedEnumFilter { feat_idx, allowed });
} else {
// Numeric filter: parse min:max and encode to u16
@ -369,20 +374,16 @@ mod tests {
}
#[test]
fn parse_enum_with_unknown_value() {
fn parse_enum_with_unknown_value_errors() {
let tq = test_quant(4, 2);
let (_numeric, enums) = parse_filters(
let result = parse_filters(
Some("Type:Detached|Unknown|Flats/Maisonettes"),
&extended_feature_map(),
&extended_enum_values(),
&tq.as_ref(),
)
.unwrap();
assert_eq!(enums.len(), 1);
assert!(enums[0].allowed.contains(&0)); // Detached
assert!(enums[0].allowed.contains(&3)); // Flats/Maisonettes
assert_eq!(enums[0].allowed.len(), 2);
);
assert!(result.is_err());
assert!(result.unwrap_err().contains("Unknown value 'Unknown'"));
}
#[test]

View file

@ -1,13 +1,62 @@
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use metrics::gauge;
use parking_lot::RwLock;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::state::AppState;
/// Cache TTL for the superuser token. PocketBase superuser JWTs are valid for
/// ~14 days by default, so 10 minutes is very conservative while eliminating
/// nearly all redundant auth requests (metrics poller, newsletter, invites, etc.).
const SUPERUSER_TOKEN_TTL_SECS: u64 = 600;
pub struct SuperuserTokenCache {
token: RwLock<Option<(String, Instant)>>,
}
impl SuperuserTokenCache {
pub fn new() -> Self {
Self {
token: RwLock::new(None),
}
}
}
/// Get a cached superuser token, or authenticate fresh if expired/missing.
pub async fn get_superuser_token(state: &AppState) -> anyhow::Result<String> {
// Check cache first (read lock — cheap, non-blocking for other readers)
{
let cached = state.superuser_token_cache.token.read();
if let Some((token, created)) = cached.as_ref() {
if created.elapsed().as_secs() < SUPERUSER_TOKEN_TTL_SECS {
return Ok(token.clone());
}
}
}
// Cache miss or expired — fetch a fresh token
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await?;
// Store in cache
{
let mut cached = state.superuser_token_cache.token.write();
*cached = Some((token.clone(), Instant::now()));
}
Ok(token)
}
#[derive(Deserialize)]
struct AuthResponse {
token: String,
@ -775,21 +824,14 @@ pub fn start_metrics_poller(shared: Arc<crate::state::SharedState>) {
}
async fn poll_pocketbase_counts(state: &AppState) {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(state).await {
Ok(tk) => tk,
Err(err) => {
warn!("PocketBase metrics poll auth failed: {err}");
return;
}
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
// Simple collection counts
for (collection, metric_name) in [

View file

@ -12,7 +12,7 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::{AI_FILTERS_MAX_TOKENS, AI_FILTERS_TEMPERATURE, AI_FILTERS_WEEKLY_TOKEN_LIMIT};
use crate::data::slugify;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::routes::{FeatureInfo, FeaturesResponse};
use crate::state::{AppState, SharedState};
use crate::utils::gemini_chat;
@ -37,6 +37,8 @@ pub struct AiFiltersRequest {
query: String,
/// Current filters for conversational refinement (e.g. "make it cheaper")
context: Option<AiFiltersContext>,
/// Current listing mode (historical/buy/rent). Defaults to "historical".
listing_type: Option<String>,
}
#[derive(Serialize)]
@ -58,6 +60,8 @@ pub struct AiFiltersResponse {
/// What the LLM couldn't map to existing filters (empty if everything matched)
#[serde(skip_serializing_if = "String::is_empty")]
notes: String,
/// The listing mode used for this response (historical/buy/rent)
listing_type: String,
}
/// Strip markdown code fences (```json ... ``` or ``` ... ```) from LLM output.
@ -268,6 +272,37 @@ pub fn build_system_prompt(
modes_list,
));
// Listing modes section
parts.push(
"\n--- LISTING MODES ---\n\
There are three listing modes that control which property data is shown:\n\
- \"historical\": Historical sales from Land Registry (default). Uses features like \
\"Last known price\", \"Estimated current price\", \"Price per sqm\".\n\
- \"buy\": Properties currently listed for sale. Uses features like \"Asking price\", \
\"Asking price per sqm\".\n\
- \"rent\": Properties currently listed for rent. Uses features like \
\"Asking rent (monthly)\".\n\
\n\
When the user mentions buying, purchasing, for-sale properties, or asking prices, \
set listing_type to \"buy\".\n\
When the user mentions renting, letting, rental properties, or monthly rent, \
set listing_type to \"rent\".\n\
When the user doesn't specify or mentions historical prices/past sales, \
omit listing_type to keep the current mode.\n\
\n\
Features marked with [mode] below are only available in that mode. \
Features without a mode annotation work in all modes. \
ONLY use features valid for the chosen listing_type.\n\
If the user mentions price and the mode is \"buy\", use \"Asking price\" (not \"Last known price\").\n\
If the user mentions rent/price and the mode is \"rent\", use \"Asking rent (monthly)\".\n\
\n\
Feature equivalences across modes:\n\
- \"Estimated current price\" (historical) ↔ \"Asking price\" (buy)\n\
- \"Est. price per sqm\" (historical) ↔ \"Asking price per sqm\" (buy)\n\
- \"Estimated monthly rent\" (historical) ↔ \"Asking rent (monthly)\" (rent)"
.to_string(),
);
// Feature catalogue
parts.push("\n--- AVAILABLE FEATURES ---\n".to_string());
for group in &features.groups {
@ -285,11 +320,17 @@ pub fn build_system_prompt(
description,
prefix,
suffix,
modes,
..
} => {
let mode_str = if modes.is_empty() {
String::new()
} else {
format!(" [{}]", modes.join("/"))
};
parts.push(format!(
"- \"{}\" (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, prefix, min, suffix, prefix, max, suffix, description
"- \"{}\"{} (numeric, {}{:.0}{} to {}{:.0}{}): {}",
name, mode_str, prefix, min, suffix, prefix, max, suffix, description
));
}
FeatureInfo::Enum {
@ -298,6 +339,10 @@ pub fn build_system_prompt(
description,
..
} => {
// Skip Listing status — handled via listing_type field
if name == "Listing status" {
continue;
}
parts.push(format!(
"- \"{}\" (enum, values: [{}]): {}",
name,
@ -381,10 +426,37 @@ pub fn build_system_prompt(
.to_string(),
);
// Examples showing listing mode switching
parts.push(
"\nUser: \"2 bed flat to rent under £1500/month\"\n\
Output: {\"listing_type\": \"rent\", \
\"numeric_filters\": [{\"name\": \"Asking rent (monthly)\", \"bound\": \"max\", \"value\": 1500}], \
\"enum_filters\": [{\"name\": \"Property type\", \"values\": [\"Flats/Maisonettes\"]}], \
\"travel_time_filters\": [], \
\"notes\": \"\"}"
.to_string(),
);
parts.push(
"\nUser: \"3 bed house to buy under 500k with good schools\"\n\
Output: {\"listing_type\": \"buy\", \
\"numeric_filters\": [{\"name\": \"Asking price\", \"bound\": \"max\", \"value\": 500000}, \
{\"name\": \"Good+ primary schools within 5km\", \"bound\": \"min\", \"value\": 5}], \
\"enum_filters\": [{\"name\": \"Property type\", \
\"values\": [\"Detached\", \"Semi-Detached\", \"Terraced\"]}], \
\"travel_time_filters\": [], \
\"notes\": \"\"}"
.to_string(),
);
// Output format reminder
parts.push(
"\n--- OUTPUT FORMAT ---\n\
{\"numeric_filters\": [...], \"enum_filters\": [...], \"travel_time_filters\": [{\"mode\": \"...\", \"slug\": \"...\", \"label\": \"...\", \"bound\": \"min\"|\"max\", \"value\": N}, ...], \"notes\": \"...\"}\n\
{\"listing_type\": \"buy\"|\"rent\" (OPTIONAL — only when switching mode), \
\"numeric_filters\": [...], \"enum_filters\": [...], \
\"travel_time_filters\": [{\"mode\": \"...\", \"slug\": \"...\", \"label\": \"...\", \
\"bound\": \"min\"|\"max\", \"value\": N}, ...], \"notes\": \"...\"}\n\
- listing_type: include only when the user explicitly wants to buy or rent. Omit to keep current mode.\n\
- travel_time_filters: use ONLY slugs returned by search_destinations. If a place isn't found, mention it in notes.\n\
Respond with ONLY the JSON object. No explanation."
.to_string(),
@ -409,19 +481,12 @@ async fn fetch_ai_usage(
state: &AppState,
user_id: &str,
) -> Result<(u64, u64), (StatusCode, String)> {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
.map_err(|err| {
let token = get_superuser_token(state).await.map_err(|err| {
warn!("Failed to auth superuser for AI usage check: {err}");
(StatusCode::BAD_GATEWAY, "Internal error".into())
})?;
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/users/records/{user_id}");
let resp = state
.http_client
@ -460,15 +525,7 @@ async fn fetch_ai_usage(
/// Update the user's AI token usage in PocketBase.
/// Best-effort — logs warnings on failure but does not propagate errors.
async fn update_ai_usage(state: &AppState, user_id: &str, tokens_used: u64, week: u64) {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(state).await {
Ok(tk) => tk,
Err(err) => {
warn!("Failed to auth superuser for AI usage update: {err}");
@ -476,6 +533,7 @@ async fn update_ai_usage(state: &AppState, user_id: &str, tokens_used: u64, week
}
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/users/records/{user_id}");
let res = state
.http_client
@ -533,9 +591,17 @@ pub async fn post_ai_filters(
let tools = build_tool_declarations(&state);
// Build user message with optional context for conversational refinement
// Resolve current listing mode from request
let current_mode = req.listing_type.as_deref().unwrap_or("historical");
let current_mode = match current_mode {
"historical" | "buy" | "rent" => current_mode,
_ => "historical",
};
// Build user message with listing mode and optional context for conversational refinement
let user_text = if let Some(ref ctx) = req.context {
let mut msg = String::new();
msg.push_str(&format!("Current listing mode: {}\n", current_mode));
msg.push_str("Currently active filters:\n");
msg.push_str(&serde_json::to_string(&ctx.filters).unwrap_or_default());
if !ctx.travel_time.is_empty() {
@ -553,7 +619,10 @@ pub async fn post_ai_filters(
msg.push_str(&format!("\nUser request: {}", req.query));
msg
} else {
req.query.clone()
format!(
"Current listing mode: {}\nUser request: {}",
current_mode, req.query
)
};
let mut contents = vec![json!({
@ -679,7 +748,17 @@ pub async fn post_ai_filters(
}
};
let filters = validate_and_convert(&raw, &state.features_response);
// Resolve listing_type: LLM output > request > "historical"
let listing_type = raw
.get("listing_type")
.and_then(|val| val.as_str())
.unwrap_or(current_mode);
let listing_type = match listing_type {
"historical" | "buy" | "rent" => listing_type,
_ => current_mode,
};
let mut filters = validate_and_convert(&raw, &state.features_response, listing_type);
let travel_time_filters = validate_travel_time_filters(&raw, &state);
let notes = raw
.get("notes")
@ -687,6 +766,16 @@ pub async fn post_ai_filters(
.unwrap_or("")
.to_string();
// Auto-inject Listing status filter for the chosen mode
let listing_value = match listing_type {
"buy" => "For sale",
"rent" => "For rent",
_ => "Historical sale",
};
if let Value::Object(ref mut map) = filters {
map.insert("Listing status".to_string(), json!([listing_value]));
}
// Update usage with total accumulated tokens
let new_total = tokens_used + total_tokens_accumulated;
update_ai_usage(&state, &user.id, new_total, current_week).await;
@ -698,6 +787,7 @@ pub async fn post_ai_filters(
filters,
travel_time_filters,
notes,
listing_type: listing_type.to_string(),
}));
}
@ -787,10 +877,10 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
/// ```json
/// { "Last known price": [0, 300000], "Leasehold/Freehold": ["Freehold"] }
/// ```
fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type: &str) -> Value {
let mut result = serde_json::Map::new();
// Build lookup maps from feature metadata
// Build lookup maps from feature metadata, filtering by listing mode
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32)> =
rustc_hash::FxHashMap::default();
let mut enum_features: rustc_hash::FxHashMap<&str, &[String]> =
@ -799,11 +889,23 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse) -> Value {
for group in &features.groups {
for feature in &group.features {
match feature {
FeatureInfo::Numeric { name, min, max, .. } => {
numeric_features.insert(name, (*min, *max));
FeatureInfo::Numeric {
name,
min,
max,
modes,
..
} => {
// Only include features valid for the chosen listing mode
if modes.is_empty() || modes.contains(&listing_type) {
numeric_features.insert(name, (*min, *max));
}
}
FeatureInfo::Enum { name, values, .. } => {
enum_features.insert(name, values);
// Skip Listing status — handled via auto-injection
if name != "Listing status" {
enum_features.insert(name, values);
}
}
}
}

View file

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::{AppState, SharedState};
use super::pricing::{count_licensed_users, price_for_count};
@ -88,6 +88,8 @@ pub async fn post_checkout(
state.stripe_referral_coupon_id.clone(),
));
info!(code = %code, "Applying referral coupon to checkout");
} else {
warn!(code = %code, "Referral code validation failed, proceeding without discount");
}
}
@ -131,15 +133,9 @@ pub async fn post_checkout(
/// Grant a license by updating the user's subscription to "licensed" in PocketBase.
async fn grant_license(state: &AppState, user_id: &str) -> anyhow::Result<()> {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await?;
let token = get_superuser_token(state).await?;
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/users/records/{user_id}");
let resp = state
.http_client

View file

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::SharedState;
#[derive(Serialize)]
@ -118,14 +118,7 @@ pub async fn post_invites(
let code = generate_invite_code();
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser: {err}");
@ -202,14 +195,7 @@ pub async fn get_invite(
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser: {err}");
@ -325,14 +311,7 @@ pub async fn post_redeem_invite(
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser: {err}");
@ -500,14 +479,7 @@ pub async fn get_invites(
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser: {err}");

View file

@ -8,7 +8,7 @@ use serde::Deserialize;
use tracing::warn;
use crate::auth::OptionalUser;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::SharedState;
#[derive(Deserialize)]
@ -27,16 +27,7 @@ pub async fn patch_newsletter(
None => return StatusCode::UNAUTHORIZED.into_response(),
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to authenticate as PocketBase superuser: {err}");
@ -44,6 +35,7 @@ pub async fn patch_newsletter(
}
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/users/records/{}", user.id);
let res = state
.http_client

View file

@ -281,7 +281,7 @@ pub async fn get_postcodes(
histogram!("postcodes_response_count").record(features.len() as f64);
let truncated = features.len() > MAX_CELLS_PER_REQUEST;
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed();
info!(
postcodes_before_filter,

View file

@ -7,7 +7,7 @@ use axum::Json;
use serde::Serialize;
use tracing::warn;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::{AppState, SharedState};
/// Pricing tiers: (cumulative user cap, price in pence).
@ -45,15 +45,9 @@ pub fn price_for_count(count: u64) -> u64 {
/// Count users with subscription="licensed" in PocketBase.
pub async fn count_licensed_users(state: &AppState) -> anyhow::Result<u64> {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await?;
let token = get_superuser_token(state).await?;
let pb_url = state.pocketbase_url.trim_end_matches('/');
let filter = "subscription=\"licensed\"";
let url = format!(
"{pb_url}/api/collections/users/records?filter={}&perPage=1",

View file

@ -147,6 +147,7 @@ fn rebuild_data(shared: &SharedState, start: Instant) -> anyhow::Result<(usize,
poi_category_groups: Arc::clone(&old.poi_category_groups),
travel_time_store: Arc::clone(&old.travel_time_store),
token_cache: Arc::clone(&old.token_cache),
superuser_token_cache: Arc::clone(&old.superuser_token_cache),
// Config (cheap clone)
screenshot_url: old.screenshot_url.clone(),

View file

@ -8,7 +8,7 @@ use rand::Rng;
use serde::{Deserialize, Serialize};
use tracing::warn;
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::SharedState;
const CODE_LEN: usize = 8;
@ -42,14 +42,7 @@ pub async fn post_shorten(State(shared): State<Arc<SharedState>>, Json(req): Jso
let state = shared.load_state();
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("PocketBase superuser auth failed: {err}");
@ -102,14 +95,7 @@ pub async fn get_short_url(State(shared): State<Arc<SharedState>>, Path(code): P
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("PocketBase superuser auth failed: {err}");

View file

@ -8,7 +8,7 @@ use hmac::{Hmac, Mac};
use sha2::Sha256;
use tracing::{info, warn};
use crate::pocketbase::auth_superuser;
use crate::pocketbase::get_superuser_token;
use crate::state::SharedState;
type HmacSha256 = Hmac<Sha256>;
@ -31,6 +31,19 @@ fn verify_signature(payload: &[u8], sig_header: &str, secret: &str) -> bool {
_ => return false,
};
// Reject webhooks older than 5 minutes to prevent replay attacks
if let Ok(ts_secs) = ts.parse::<i64>() {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
if (now - ts_secs).abs() > 300 {
return false;
}
} else {
return false;
}
// Compute expected signature: HMAC-SHA256(secret, "TIMESTAMP.PAYLOAD")
let signed_payload = format!("{ts}.{}", String::from_utf8_lossy(payload));
let mut mac = match HmacSha256::new_from_slice(secret.as_bytes()) {
@ -94,15 +107,7 @@ pub async fn post_stripe_webhook(
}
// Update user subscription to "licensed" via PocketBase superuser auth
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
let token = match get_superuser_token(&state).await {
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser in webhook: {err}");
@ -110,6 +115,7 @@ pub async fn post_stripe_webhook(
}
};
let pb_url = state.pocketbase_url.trim_end_matches('/');
let url = format!("{pb_url}/api/collections/users/records/{user_id}");
let res = state
.http_client

View file

@ -9,6 +9,7 @@ use crate::auth::TokenCache;
use crate::data::{
POICategoryGroup, POIData, PlaceData, PostcodeData, PropertyData, TravelTimeStore,
};
use crate::pocketbase::SuperuserTokenCache;
use crate::routes::FeaturesResponse;
use crate::utils::GridIndex;
@ -44,6 +45,8 @@ pub struct AppState {
pub travel_time_store: Arc<TravelTimeStore>,
/// Token validation cache (60s TTL)
pub token_cache: Arc<TokenCache>,
/// Cached PocketBase superuser token (10min TTL) to avoid rate-limiting
pub superuser_token_cache: Arc<SuperuserTokenCache>,
// --- Config (cheap to clone) ---
/// URL of the screenshot service (e.g. http://screenshot:8002)