More

2026-02-15 09:48:30 +00:00 · 2026-02-15 09:48:30 +00:00 · 03445188ea
commit 03445188ea
parent 128b3191e7
54 changed files with 596953 additions and 3577 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,128 +0,0 @@
-name: CI
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  lint-python:
-    name: Lint Python
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-        with:
-          version: "latest"
-
-      - name: Set up Python
-        run: uv python install 3.12
-
-      - name: Install dependencies
-        run: uv sync --dev
-
-      - name: Run ruff check
-        run: uv run ruff check .
-
-      - name: Run ruff format check
-        run: uv run ruff format --check .
-
-  lint-frontend:
-    name: Lint Frontend
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: frontend
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-          cache: "npm"
-          cache-dependency-path: frontend/package-lock.json
-
-      - name: Install dependencies
-        run: npm ci
-
-      - name: Run ESLint
-        run: npm run lint
-
-      - name: Run Prettier check
-        run: npm run format:check
-
-      - name: Run TypeScript check
-        run: npm run typecheck
-
-  build-frontend:
-    name: Build Frontend
-    runs-on: ubuntu-latest
-    needs: [lint-frontend]
-    defaults:
-      run:
-        working-directory: frontend
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: "20"
-          cache: "npm"
-          cache-dependency-path: frontend/package-lock.json
-
-      - name: Install dependencies
-        run: npm ci
-
-      - name: Build
-        run: npm run build
-
-  lint-rust:
-    name: Lint Rust
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: server-rs
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
-        with:
-          components: clippy, rustfmt
-
-      - name: Cache cargo
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: server-rs
-
-      - name: Run clippy
-        run: cargo clippy -- -D warnings
-
-      - name: Check formatting
-        run: cargo fmt --check
-
-  test-rust:
-    name: Test Rust
-    runs-on: ubuntu-latest
-    needs: [lint-rust]
-    defaults:
-      run:
-        working-directory: server-rs
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
-
-      - name: Cache cargo
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: server-rs
-
-      - name: Run tests
-        run: cargo test
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -1,49 +0,0 @@
-name: Docker
-
-on:
-  push:
-    branches: [main]
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}
-
-jobs:
-  build-and-push:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Log in to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Extract metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          tags: |
-            type=raw,value=latest
-            type=sha,prefix=sha-,format=short
-
-      - name: Build and push
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -1,28 +0,0 @@
-name: Lint
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-
-      - name: Set up Python
-        run: uv python install
-
-      - name: Install dependencies
-        run: uv sync
-
-      - name: Check linting
-        run: uv run ruff check .
-
-      - name: Check formatting
-        run: uv run ruff format --check .
--- a/notebooks/bank_postcode_boundaries.ipynb
+++ b/notebooks/bank_postcode_boundaries.ipynb
--- a/notebooks/price_model_evaluation.ipynb
+++ b/notebooks/price_model_evaluation.ipynb
--- a/analyses/rightmove_buy.ipynb
+++ b/analyses/rightmove_buy.ipynb
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -31,7 +31,7 @@ services:
      OLLAMA_URL: http://host.docker.internal:11434
      OLLAMA_MODEL: gpt-oss:20b
      PUBLIC_URL: https://perfectpostcodes.schmelczer.dev
-      R5_URL: http://r5:8003
+
      GOOGLE_MAPS_API_KEY: "AIzaSyBgBn9LjrxHCjb9j1LZbLYpEdCJj-NkHPY"
    depends_on:
      pocketbase:
@ -141,27 +141,6 @@ services:
        condition: service_healthy
    restart: unless-stopped

-  r5:
-    init: true
-    build: ./r5-java
-    ports:
-      - "8004:8003"
-    networks:
-      - dev-network
-    volumes:
-      - r5-network:/data/network
-      - ./property-data/transit:/data/transit:ro
-      - ./property-data/transit/raw:/data/transit-raw:ro
-    environment:
-      DATA_DIR: /data/transit
-      OSM_DIR: /data/transit-raw
-      NETWORK_CACHE_DIR: /data/network
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8003/health"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-      start_period: 300s

 volumes:
  pb-data:
@ -169,7 +148,6 @@ volumes:
  cargo-target:
  frontend-node-modules:
  screenshot-cache:
-  r5-network:
  gluetun-cache-v2:
  gluetun-auth:

--- a/finder/Dockerfile
+++ b/finder/Dockerfile
@ -6,6 +6,6 @@ WORKDIR /app
 COPY pyproject.toml ./
 RUN uv pip install --system -r pyproject.toml

-COPY main.py ./
+COPY *.py ./

 CMD ["python3", "main.py"]
--- a/finder/constants.py
+++ b/finder/constants.py
@ -0,0 +1,56 @@
+import os
+from pathlib import Path
+
+ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
+DATA_DIR = Path("/app/data")
+PAGE_SIZE = 24
+DELAY_BETWEEN_PAGES = 1.0
+DELAY_BETWEEN_OUTCODES = 2.0
+MAX_RETRIES = 3
+RETRY_BASE_DELAY = 2.0
+GRID_CELL_SIZE = 0.01  # degrees for postcode spatial index
+SEED = 42
+
+TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
+SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
+RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
+
+PROPERTY_TYPE_MAP = {
+    "Detached": "Detached",
+    "Semi-Detached": "Semi-Detached",
+    "Terraced": "Terraced",
+    "End of Terrace": "Terraced",
+    "Mid Terrace": "Terraced",
+    "Flat": "Flat",
+    "Maisonette": "Flat",
+    "Studio": "Flat",
+    "Apartment": "Flat",
+    "Penthouse": "Flat",
+    "Ground Flat": "Flat",
+    "Detached Bungalow": "Detached",
+    "Semi-Detached Bungalow": "Semi-Detached",
+    "Town House": "Terraced",
+    "Link Detached": "Detached",
+    "Link Detached House": "Detached",
+    "Bungalow": "Other",
+    "Cottage": "Other",
+    "Park Home": "Other",
+    "Land": "Other",
+    "Farm / Barn": "Other",
+    "House": "Detached",
+    "Not Specified": "Other",
+    "Chalet": "Other",
+    "Barn Conversion": "Other",
+    "Coach House": "Other",
+    "Character Property": "Other",
+    "Cluster House": "Other",
+    "Retirement Property": "Flat",
+    "Plot": "Other",
+    "Garages": "Other",
+    "Mews": "Terraced",
+}
+
+CHANNELS = [
+    {"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
+    {"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
+]
--- a/finder/http_client.py
+++ b/finder/http_client.py
@ -0,0 +1,126 @@
+import logging
+import random
+import threading
+import time
+
+import httpx
+from fake_useragent import UserAgent
+
+from constants import MAX_RETRIES, RETRY_BASE_DELAY
+from metrics import http_errors_total, http_requests_total, ip_rotations_total
+
+log = logging.getLogger("rightmove")
+
+_ua = UserAgent(browsers=["Chrome", "Edge"], os=["Windows", "Mac OS X"], min_version=120.0)
+
+
+def _endpoint_label(url: str) -> str:
+    if "typeahead" in url:
+        return "typeahead"
+    if "search" in url:
+        return "search"
+    return "other"
+
+
+def _status_label(code: int) -> str:
+    if code >= 500:
+        return "5xx"
+    return str(code)
+
+# Gluetun control API — runs on port 8000 inside the gluetun container.
+# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
+GLUETUN_API = "http://127.0.0.1:8000"
+_ip_rotate_lock = threading.Lock()
+
+
+def rotate_ip() -> bool:
+    """Ask gluetun to reconnect to a different VPN server, getting a new IP.
+    Returns True if the IP changed successfully."""
+    with _ip_rotate_lock:
+        log.info("Rotating VPN IP via gluetun...")
+        try:
+            # Get current IP
+            with httpx.Client(timeout=10) as ctl:
+                old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
+                old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
+                log.info("Current IP: %s", old_ip)
+
+                # Trigger server change — PUT with empty JSON body picks a random server
+                resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
+                if resp.status_code != 200:
+                    log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
+                    return False
+                time.sleep(2)
+
+                resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
+                if resp.status_code != 200:
+                    log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
+                    return False
+
+            # Wait for reconnection
+            for _ in range(30):
+                time.sleep(2)
+                try:
+                    with httpx.Client(timeout=10) as ctl:
+                        new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
+                        if new_ip_resp.status_code == 200:
+                            new_ip = new_ip_resp.json().get("public_ip", "")
+                            if new_ip and new_ip != old_ip:
+                                log.info("IP rotated: %s → %s", old_ip, new_ip)
+                                ip_rotations_total.labels(result="success").inc()
+                                return True
+                except Exception:
+                    pass  # VPN still reconnecting
+
+            log.warning("IP rotation timed out (may still be same IP)")
+            ip_rotations_total.labels(result="failure").inc()
+            return False
+
+        except Exception as e:
+            log.error("IP rotation failed: %s", e)
+            ip_rotations_total.labels(result="failure").inc()
+            return False
+
+
+def make_client() -> httpx.Client:
+    return httpx.Client(
+        timeout=30,
+        headers={"User-Agent": _ua.random, "Accept": "application/json"},
+        follow_redirects=True,
+    )
+
+
+def fetch_with_retry(
+    client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
+) -> dict | None:
+    """GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure.
+    On 403, triggers IP rotation and retries once."""
+    endpoint = _endpoint_label(url)
+    for attempt in range(MAX_RETRIES):
+        try:
+            resp = client.get(url, params=params)
+            http_requests_total.labels(status=_status_label(resp.status_code), endpoint=endpoint).inc()
+            if resp.status_code == 200:
+                return resp.json()
+            if resp.status_code == 403 and on_403:
+                log.warning("HTTP 403 — IP likely blocked, rotating...")
+                if rotate_ip():
+                    # Retry once with new IP (but don't recurse on 403 again)
+                    return fetch_with_retry(client, url, params, on_403=False)
+                log.error("IP rotation failed, giving up on %s", url)
+                return None
+            if resp.status_code in (429, 500, 502, 503, 504):
+                delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
+                log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
+                time.sleep(delay)
+                continue
+            log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
+            return None
+        except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
+            http_errors_total.labels(type=type(e).__name__).inc()
+            delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
+            log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
+            time.sleep(delay)
+    http_errors_total.labels(type="retry_exhausted").inc()
+    log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
+    return None
--- a/finder/main.py
+++ b/finder/main.py
@ -1,17 +1,21 @@
 import logging
-import math
-import os
-import random
-import re
 import threading
 import time
-from collections import defaultdict
-from dataclasses import dataclass, field
 from pathlib import Path

-import httpx
-import polars as pl
-from flask import Flask, jsonify, send_from_directory
+from flask import Flask, Response, jsonify, send_from_directory
+from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
+
+from constants import DATA_DIR
+from rightmove import outcode_cache
+from scraper import (
+    _sync_gauges,
+    build_postcode_index,
+    load_outcodes,
+    run_scrape,
+    status,
+    status_lock,
+)

 # ---------------------------------------------------------------------------
 # Logging
@ -33,615 +37,6 @@ log.setLevel(logging.DEBUG)
 logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.getLogger("httpcore").setLevel(logging.WARNING)

-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-
-ARCGIS_PATH = os.environ.get("ARCGIS_PATH", "/data/arcgis_data.parquet")
-DATA_DIR = Path("/app/data")
-PAGE_SIZE = 24
-MAX_PAGES_PER_OUTCODE = 42  # 24*42 = 1008, safety cap per outcode
-DELAY_BETWEEN_PAGES = 1.0
-DELAY_BETWEEN_OUTCODES = 2.0
-MAX_RETRIES = 3
-RETRY_BASE_DELAY = 2.0
-GRID_CELL_SIZE = 0.01  # degrees for postcode spatial index
-SEED = 42
-
-TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
-SEARCH_URL = "https://www.rightmove.co.uk/api/property-search/listing/search"
-RIGHTMOVE_BASE = "https://www.rightmove.co.uk"
-
-PROPERTY_TYPE_MAP = {
-    "Detached": "Detached",
-    "Semi-Detached": "Semi-Detached",
-    "Terraced": "Terraced",
-    "End of Terrace": "Terraced",
-    "Mid Terrace": "Terraced",
-    "Flat": "Flat",
-    "Maisonette": "Flat",
-    "Studio": "Flat",
-    "Apartment": "Flat",
-    "Penthouse": "Flat",
-    "Ground Flat": "Flat",
-    "Detached Bungalow": "Detached",
-    "Semi-Detached Bungalow": "Semi-Detached",
-    "Town House": "Terraced",
-    "Link Detached": "Detached",
-    "Link Detached House": "Detached",
-    "Bungalow": "Other",
-    "Cottage": "Other",
-    "Park Home": "Other",
-    "Land": "Other",
-    "Farm / Barn": "Other",
-    "House": "Detached",
-    "Not Specified": "Other",
-    "Chalet": "Other",
-    "Barn Conversion": "Other",
-    "Coach House": "Other",
-    "Character Property": "Other",
-    "Cluster House": "Other",
-    "Retirement Property": "Flat",
-    "Plot": "Other",
-    "Garages": "Other",
-    "Mews": "Terraced",
-}
-
-CHANNELS = [
-    {"channel": "BUY", "transactionType": "BUY", "sortType": "2"},
-    {"channel": "RENT", "transactionType": "LETTING", "sortType": "6"},
-]
-
-# ---------------------------------------------------------------------------
-# Postcode spatial index
-# ---------------------------------------------------------------------------
-
-
-class PostcodeSpatialIndex:
-    """Grid-based spatial index over arcgis postcodes for nearest-lookup."""
-
-    def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
-        self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
-        for lat, lng, pcd in zip(lats, lngs, postcodes):
-            gx = int(math.floor(lng / GRID_CELL_SIZE))
-            gy = int(math.floor(lat / GRID_CELL_SIZE))
-            self.grid[(gx, gy)].append((lat, lng, pcd))
-        log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
-
-    def nearest(self, lat: float, lng: float) -> str | None:
-        gx = int(math.floor(lng / GRID_CELL_SIZE))
-        gy = int(math.floor(lat / GRID_CELL_SIZE))
-        best_dist = float("inf")
-        best_pcd = None
-        for dx in range(-1, 2):
-            for dy in range(-1, 2):
-                for plat, plng, pcd in self.grid.get((gx + dx, gy + dy), []):
-                    d = (plat - lat) ** 2 + (plng - lng) ** 2
-                    if d < best_dist:
-                        best_dist = d
-                        best_pcd = pcd
-        return best_pcd
-
-
-# ---------------------------------------------------------------------------
-# Scrape status
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class ScrapeStatus:
-    state: str = "idle"  # idle | running | done | error
-    channel: str = ""
-    outcode: str = ""
-    outcodes_done: int = 0
-    outcodes_total: int = 0
-    properties_buy: int = 0
-    properties_rent: int = 0
-    errors: list[str] = field(default_factory=list)
-    started_at: float = 0.0
-    finished_at: float = 0.0
-
-
-status = ScrapeStatus()
-status_lock = threading.Lock()
-debug_data: dict = {"last_response": None, "outcode_cache": {}}
-
-# ---------------------------------------------------------------------------
-# HTTP helpers
-# ---------------------------------------------------------------------------
-
-USER_AGENT = (
-    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
-)
-
-# Gluetun control API — runs on port 8000 inside the gluetun container.
-# Since finder uses network_mode: service:gluetun, localhost IS gluetun.
-GLUETUN_API = "http://127.0.0.1:8000"
-_ip_rotate_lock = threading.Lock()
-
-
-def rotate_ip() -> bool:
-    """Ask gluetun to reconnect to a different VPN server, getting a new IP.
-    Returns True if the IP changed successfully."""
-    with _ip_rotate_lock:
-        log.info("Rotating VPN IP via gluetun...")
-        try:
-            # Get current IP
-            with httpx.Client(timeout=10) as ctl:
-                old_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
-                old_ip = old_ip_resp.json().get("public_ip", "unknown") if old_ip_resp.status_code == 200 else "unknown"
-                log.info("Current IP: %s", old_ip)
-
-                # Trigger server change — PUT with empty JSON body picks a random server
-                resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "stopped"})
-                if resp.status_code != 200:
-                    log.error("Failed to stop VPN: %d %s", resp.status_code, resp.text)
-                    return False
-                time.sleep(2)
-
-                resp = ctl.put(f"{GLUETUN_API}/v1/vpn/status", json={"status": "running"})
-                if resp.status_code != 200:
-                    log.error("Failed to start VPN: %d %s", resp.status_code, resp.text)
-                    return False
-
-            # Wait for reconnection
-            for _ in range(30):
-                time.sleep(2)
-                try:
-                    with httpx.Client(timeout=10) as ctl:
-                        new_ip_resp = ctl.get(f"{GLUETUN_API}/v1/publicip/ip")
-                        if new_ip_resp.status_code == 200:
-                            new_ip = new_ip_resp.json().get("public_ip", "")
-                            if new_ip and new_ip != old_ip:
-                                log.info("IP rotated: %s → %s", old_ip, new_ip)
-                                return True
-                except Exception:
-                    pass  # VPN still reconnecting
-
-            log.warning("IP rotation timed out (may still be same IP)")
-            return False
-
-        except Exception as e:
-            log.error("IP rotation failed: %s", e)
-            return False
-
-
-def make_client() -> httpx.Client:
-    return httpx.Client(
-        timeout=30,
-        headers={"User-Agent": USER_AGENT, "Accept": "application/json"},
-        follow_redirects=True,
-    )
-
-
-def fetch_with_retry(
-    client: httpx.Client, url: str, params: dict | None = None, on_403: bool = True
-) -> dict | None:
-    """GET JSON with retries on 429/5xx/connection errors. Returns None on permanent failure.
-    On 403, triggers IP rotation and retries once."""
-    for attempt in range(MAX_RETRIES):
-        try:
-            resp = client.get(url, params=params)
-            if resp.status_code == 200:
-                return resp.json()
-            if resp.status_code == 403 and on_403:
-                log.warning("HTTP 403 — IP likely blocked, rotating...")
-                if rotate_ip():
-                    # Retry once with new IP (but don't recurse on 403 again)
-                    return fetch_with_retry(client, url, params, on_403=False)
-                log.error("IP rotation failed, giving up on %s", url)
-                return None
-            if resp.status_code in (429, 500, 502, 503, 504):
-                delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
-                log.warning("HTTP %d from %s, retry %d/%d in %.1fs", resp.status_code, url, attempt + 1, MAX_RETRIES, delay)
-                time.sleep(delay)
-                continue
-            log.error("HTTP %d from %s (non-retryable)", resp.status_code, url)
-            return None
-        except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
-            delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
-            log.warning("%s from %s, retry %d/%d in %.1fs", type(e).__name__, url, attempt + 1, MAX_RETRIES, delay)
-            time.sleep(delay)
-    log.error("All %d retries exhausted for %s", MAX_RETRIES, url)
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Rightmove API
-# ---------------------------------------------------------------------------
-
-
-def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
-    """Look up Rightmove's internal ID for an outcode via typeahead API."""
-    if outcode in debug_data["outcode_cache"]:
-        return debug_data["outcode_cache"][outcode]
-
-    data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
-    if not data:
-        return None
-
-    for match in data.get("matches", []):
-        if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
-            rid = str(match["id"])
-            debug_data["outcode_cache"][outcode] = rid
-            return rid
-
-    log.debug("Outcode %s not found in typeahead results", outcode)
-    return None
-
-
-def search_outcode(
-    client: httpx.Client,
-    outcode_id: str,
-    outcode: str,
-    channel_cfg: dict,
-    pc_index: PostcodeSpatialIndex,
-) -> list[dict]:
-    """Paginate through search results for one outcode+channel. Returns transformed properties."""
-    properties = []
-    index = 0
-
-    for page in range(MAX_PAGES_PER_OUTCODE):
-        params = {
-            "useLocationIdentifier": "true",
-            "locationIdentifier": f"OUTCODE^{outcode_id}",
-            "index": str(index),
-            "sortType": channel_cfg["sortType"],
-            "channel": channel_cfg["channel"],
-            "transactionType": channel_cfg["transactionType"],
-        }
-
-        data = fetch_with_retry(client, SEARCH_URL, params)
-        if not data:
-            log.warning("Failed to fetch page %d for %s/%s", page, outcode, channel_cfg["channel"])
-            break
-
-        debug_data["last_response"] = data
-
-        raw_props = data.get("properties", [])
-        if not raw_props:
-            break
-
-        for prop in raw_props:
-            transformed = transform_property(prop, outcode, pc_index)
-            if transformed:
-                properties.append(transformed)
-
-        # Check if there are more pages
-        result_count_str = data.get("resultCount", "0")
-        result_count = int(result_count_str.replace(",", ""))
-        index += PAGE_SIZE
-
-        if index >= result_count:
-            break
-
-        if page < MAX_PAGES_PER_OUTCODE - 1:
-            time.sleep(DELAY_BETWEEN_PAGES)
-
-    return properties
-
-
-# ---------------------------------------------------------------------------
-# Property transformation
-# ---------------------------------------------------------------------------
-
-
-def parse_display_size(display_size: str | None) -> float | None:
-    """Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
-    if not display_size:
-        return None
-    # Try sq. ft. first
-    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
-    if m:
-        sqft = float(m.group(1).replace(",", ""))
-        return round(sqft * 0.092903, 1)
-    # Try sq. m.
-    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
-    if m:
-        return round(float(m.group(1).replace(",", "")), 1)
-    return None
-
-
-def map_property_type(sub_type: str | None) -> str:
-    """Map propertySubType to canonical type."""
-    if not sub_type:
-        return "Other"
-    canonical = PROPERTY_TYPE_MAP.get(sub_type)
-    if canonical:
-        return canonical
-    log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
-    return "Other"
-
-
-def extract_tenure(tenure_obj: dict | None) -> str | None:
-    """Extract tenure string from tenure object."""
-    if not tenure_obj:
-        return None
-    tt = tenure_obj.get("tenureType", "")
-    if tt == "FREEHOLD":
-        return "Freehold"
-    if tt == "LEASEHOLD":
-        return "Leasehold"
-    return None
-
-
-def fix_coords(lat: float, lng: float) -> tuple[float, float]:
-    """Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2."""
-    if 49 <= lat <= 56 and -7 <= lng <= 2:
-        return lat, lng
-    if 49 <= lng <= 56 and -7 <= lat <= 2:
-        log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
-        return lng, lat
-    log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
-    return lat, lng
-
-
-def normalize_price(amount: int, frequency: str) -> int:
-    """Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
-    if frequency == "weekly":
-        return round(amount * 52 / 12)
-    if frequency == "yearly":
-        return round(amount / 12)
-    return amount
-
-
-def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
-    """Transform a raw Rightmove property dict into our output schema."""
-    loc = prop.get("location")
-    if not loc:
-        return None
-    raw_lat = loc.get("latitude")
-    raw_lng = loc.get("longitude")
-    if raw_lat is None or raw_lng is None:
-        return None
-
-    lat, lng = fix_coords(raw_lat, raw_lng)
-
-    price_obj = prop.get("price", {})
-    amount = price_obj.get("amount")
-    if amount is None:
-        return None
-    frequency = price_obj.get("frequency", "")
-    price = normalize_price(int(amount), frequency)
-
-    display_prices = price_obj.get("displayPrices", [])
-    price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
-
-    sub_type = prop.get("propertySubType", "")
-    bedrooms = prop.get("bedrooms", 0) or 0
-    bathrooms = prop.get("bathrooms", 0) or 0
-
-    key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
-
-    listing_update = prop.get("listingUpdate", {})
-    update_date = listing_update.get("listingUpdateDate", "")
-
-    postcode = pc_index.nearest(lat, lng)
-
-    return {
-        "id": prop.get("id"),
-        "bedrooms": bedrooms,
-        "bathrooms": bathrooms,
-        "total_rooms": bedrooms + bathrooms,
-        "longitude": lng,
-        "latitude": lat,
-        "postcode": postcode,
-        "address": prop.get("displayAddress", ""),
-        "tenure": extract_tenure(prop.get("tenure")),
-        "property_type": map_property_type(sub_type),
-        "property_sub_type": sub_type or "Unknown",
-        "price": price,
-        "price_frequency": frequency,
-        "price_qualifier": price_qualifier,
-        "floorspace_sqm": parse_display_size(prop.get("displaySize")),
-        "url": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
-        "features": key_features,
-        "first_visible_date": prop.get("firstVisibleDate", ""),
-        "update_date": update_date,
-        "outcode": outcode,
-        "house_share": sub_type == "House Share",
-    }
-
-
-# ---------------------------------------------------------------------------
-# Parquet writing
-# ---------------------------------------------------------------------------
-
-
-def write_parquet(properties: list[dict], path: Path) -> None:
-    """Write properties list to parquet using Polars."""
-    if not properties:
-        log.warning("No properties to write to %s", path)
-        return
-
-    df = pl.DataFrame(
-        {
-            "id": [p["id"] for p in properties],
-            "bedrooms": [p["bedrooms"] for p in properties],
-            "bathrooms": [p["bathrooms"] for p in properties],
-            "total_rooms": [p["total_rooms"] for p in properties],
-            "longitude": [p["longitude"] for p in properties],
-            "latitude": [p["latitude"] for p in properties],
-            "postcode": [p["postcode"] for p in properties],
-            "address": [p["address"] for p in properties],
-            "tenure": [p["tenure"] for p in properties],
-            "property_type": [p["property_type"] for p in properties],
-            "property_sub_type": [p["property_sub_type"] for p in properties],
-            "price": [p["price"] for p in properties],
-            "price_frequency": [p["price_frequency"] for p in properties],
-            "price_qualifier": [p["price_qualifier"] for p in properties],
-            "floorspace_sqm": [p["floorspace_sqm"] for p in properties],
-            "url": [p["url"] for p in properties],
-            "features": [p["features"] for p in properties],
-            "first_visible_date": [p["first_visible_date"] for p in properties],
-            "update_date": [p["update_date"] for p in properties],
-            "outcode": [p["outcode"] for p in properties],
-            "house_share": [p["house_share"] for p in properties],
-        },
-        schema={
-            "id": pl.Int64,
-            "bedrooms": pl.Int32,
-            "bathrooms": pl.Int32,
-            "total_rooms": pl.Int32,
-            "longitude": pl.Float64,
-            "latitude": pl.Float64,
-            "postcode": pl.Utf8,
-            "address": pl.Utf8,
-            "tenure": pl.Utf8,
-            "property_type": pl.Utf8,
-            "property_sub_type": pl.Utf8,
-            "price": pl.Int64,
-            "price_frequency": pl.Utf8,
-            "price_qualifier": pl.Utf8,
-            "floorspace_sqm": pl.Float64,
-            "url": pl.Utf8,
-            "features": pl.List(pl.Utf8),
-            "first_visible_date": pl.Utf8,
-            "update_date": pl.Utf8,
-            "outcode": pl.Utf8,
-            "house_share": pl.Boolean,
-        },
-    )
-
-    df.write_parquet(path)
-    log.info("Wrote %d properties to %s", len(df), path)
-
-
-# ---------------------------------------------------------------------------
-# Scrape orchestration
-# ---------------------------------------------------------------------------
-
-
-def load_outcodes() -> list[str]:
-    """Load England-only outcodes from arcgis parquet."""
-    log.info("Loading outcodes from %s", ARCGIS_PATH)
-    df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
-    england = df.filter(pl.col("ctry") == "E92000001")
-    log.info("England postcodes: %d", len(england))
-
-    outcodes = (
-        england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
-        .drop_nulls()
-        .get_column("outcode")
-        .unique()
-        .sort()
-        .to_list()
-    )
-    log.info("Unique England outcodes: %d", len(outcodes))
-    return outcodes
-
-
-def build_postcode_index() -> PostcodeSpatialIndex:
-    """Build spatial index from arcgis England postcodes."""
-    log.info("Building postcode spatial index from %s", ARCGIS_PATH)
-    df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
-    england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
-    return PostcodeSpatialIndex(
-        england.get_column("lat").to_list(),
-        england.get_column("long").to_list(),
-        england.get_column("pcd").to_list(),
-    )
-
-
-def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
-    """Main scrape loop — runs in background thread."""
-    global status
-    with status_lock:
-        status.state = "running"
-        status.started_at = time.time()
-        status.errors = []
-        status.properties_buy = 0
-        status.properties_rent = 0
-
-    # Shuffle for geographic diversity
-    shuffled = list(outcodes)
-    random.seed(SEED)
-    random.shuffle(shuffled)
-
-    client = make_client()
-
-    try:
-        for channel_cfg in CHANNELS:
-            channel_name = channel_cfg["channel"]
-            file_suffix = "buy" if channel_name == "BUY" else "rent"
-            all_properties: dict[int, dict] = {}  # dedup by id
-
-            with status_lock:
-                status.channel = channel_name
-                status.outcodes_done = 0
-                status.outcodes_total = len(shuffled)
-
-            log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
-
-            for i, outcode in enumerate(shuffled):
-                with status_lock:
-                    status.outcode = outcode
-                    status.outcodes_done = i
-
-                log.debug("Outcode %s (%d/%d) — %d properties so far",
-                          outcode, i + 1, len(shuffled), len(all_properties))
-
-                try:
-                    outcode_id = resolve_outcode_id(client, outcode)
-                    if not outcode_id:
-                        log.debug("No Rightmove ID for outcode %s, skipping", outcode)
-                        continue
-
-                    props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
-                    for p in props:
-                        pid = p["id"]
-                        if pid not in all_properties:
-                            all_properties[pid] = p
-
-                    with status_lock:
-                        if channel_name == "BUY":
-                            status.properties_buy = len(all_properties)
-                        else:
-                            status.properties_rent = len(all_properties)
-
-                    log.info("Outcode %s: got %d properties (total: %d)", outcode, len(props), len(all_properties))
-
-                except Exception as e:
-                    msg = f"Error scraping {outcode}/{channel_name}: {e}"
-                    log.error(msg)
-                    with status_lock:
-                        status.errors.append(msg)
-
-                if i < len(shuffled) - 1:
-                    time.sleep(DELAY_BETWEEN_OUTCODES)
-
-            # Write parquet
-            deduped = list(all_properties.values())
-            output_path = DATA_DIR / f"rightmove_{file_suffix}.parquet"
-            write_parquet(deduped, output_path)
-
-            with status_lock:
-                if channel_name == "BUY":
-                    status.properties_buy = len(deduped)
-                else:
-                    status.properties_rent = len(deduped)
-                status.outcodes_done = len(shuffled)
-
-            log.info("=== %s channel complete: %d unique properties ===", channel_name, len(deduped))
-
-        with status_lock:
-            status.state = "done"
-            status.finished_at = time.time()
-            elapsed = status.finished_at - status.started_at
-            log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
-                     elapsed, status.properties_buy, status.properties_rent)
-
-    except Exception as e:
-        log.exception("Fatal scrape error")
-        with status_lock:
-            status.state = "error"
-            status.errors.append(f"Fatal: {e}")
-            status.finished_at = time.time()
-    finally:
-        client.close()
-
-
 # ---------------------------------------------------------------------------
 # Startup: load data
 # ---------------------------------------------------------------------------
@ -693,12 +88,18 @@ def get_status():
@app.route("/debug")
 def get_debug():
    return jsonify({
-        "last_response": debug_data["last_response"],
-        "outcode_cache_size": len(debug_data["outcode_cache"]),
-        "outcode_cache_sample": dict(list(debug_data["outcode_cache"].items())[:20]),
+        "outcode_cache_size": len(outcode_cache),
+        "outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
    })


+@app.route("/metrics")
+def metrics():
+    with status_lock:
+        _sync_gauges()
+    return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)
+
+
@app.route("/data/<filename>")
 def serve_data(filename):
    if not filename.endswith(".parquet"):
--- a/finder/metrics.py
+++ b/finder/metrics.py
@ -0,0 +1,59 @@
+from prometheus_client import Counter, Gauge
+
+# ---------------------------------------------------------------------------
+# Gauges — current scrape state, updated after each outcode
+# ---------------------------------------------------------------------------
+
+scrape_state = Gauge(
+    "scrape_state",
+    "Current scrape state as a labeled gauge (1 = active)",
+    ["state"],
+)
+
+scrape_outcodes_done = Gauge(
+    "scrape_outcodes_done",
+    "Outcodes processed in current channel",
+)
+
+scrape_outcodes_total = Gauge(
+    "scrape_outcodes_total",
+    "Total outcodes in current channel",
+)
+
+scrape_properties_total = Gauge(
+    "scrape_properties_total",
+    "Properties found so far",
+    ["channel"],
+)
+
+scrape_elapsed_seconds = Gauge(
+    "scrape_elapsed_seconds",
+    "Seconds since scrape started",
+)
+
+# ---------------------------------------------------------------------------
+# Counters — monotonically increasing
+# ---------------------------------------------------------------------------
+
+http_requests_total = Counter(
+    "http_requests_total",
+    "HTTP requests made by the scraper",
+    ["status", "endpoint"],
+)
+
+http_errors_total = Counter(
+    "http_errors_total",
+    "HTTP connection/timeout errors",
+    ["type"],
+)
+
+ip_rotations_total = Counter(
+    "ip_rotations_total",
+    "VPN IP rotation attempts",
+    ["result"],
+)
+
+scrape_errors_total = Counter(
+    "scrape_errors_total",
+    "Per-outcode scrape errors",
+)
--- a/finder/pyproject.toml
+++ b/finder/pyproject.toml
@ -6,4 +6,6 @@ dependencies = [
    "flask",
    "httpx",
    "polars",
+    "fake-useragent>=2.2.0",
+    "prometheus-client",
 ]
--- a/finder/rightmove.py
+++ b/finder/rightmove.py
@ -0,0 +1,86 @@
+import logging
+import time
+
+import httpx
+
+from constants import (
+    PAGE_SIZE,
+    DELAY_BETWEEN_PAGES,
+    SEARCH_URL,
+    TYPEAHEAD_URL,
+)
+from http_client import fetch_with_retry
+from spatial import PostcodeSpatialIndex
+from transform import transform_property
+
+log = logging.getLogger("rightmove")
+
+# Outcode ID cache (Rightmove typeahead → internal ID)
+outcode_cache: dict[str, str] = {}
+
+
+def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
+    """Look up Rightmove's internal ID for an outcode via typeahead API."""
+    if outcode in outcode_cache:
+        return outcode_cache[outcode]
+
+    data = fetch_with_retry(client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"})
+    if not data:
+        return None
+
+    for match in data.get("matches", []):
+        if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
+            rid = str(match["id"])
+            outcode_cache[outcode] = rid
+            return rid
+
+    log.debug("Outcode %s not found in typeahead results", outcode)
+    return None
+
+
+def search_outcode(
+    client: httpx.Client,
+    outcode_id: str,
+    outcode: str,
+    channel_cfg: dict,
+    pc_index: PostcodeSpatialIndex,
+) -> list[dict]:
+    """Paginate through search results for one outcode+channel. Returns transformed properties."""
+    properties = []
+    index = 0
+
+    while True:
+        params = {
+            "useLocationIdentifier": "true",
+            "locationIdentifier": f"OUTCODE^{outcode_id}",
+            "index": str(index),
+            "sortType": channel_cfg["sortType"],
+            "channel": channel_cfg["channel"],
+            "transactionType": channel_cfg["transactionType"],
+        }
+
+        data = fetch_with_retry(client, SEARCH_URL, params)
+        if not data:
+            log.warning("Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"])
+            break
+
+        raw_props = data.get("properties", [])
+        if not raw_props:
+            break
+
+        for prop in raw_props:
+            transformed = transform_property(prop, outcode, pc_index)
+            if transformed:
+                properties.append(transformed)
+
+        # Check if there are more pages
+        result_count_str = data.get("resultCount", "0")
+        result_count = int(result_count_str.replace(",", ""))
+        index += PAGE_SIZE
+
+        if index >= result_count:
+            break
+
+        time.sleep(DELAY_BETWEEN_PAGES)
+
+    return properties
--- a/finder/scraper.py
+++ b/finder/scraper.py
@ -0,0 +1,191 @@
+import logging
+import random
+import threading
+import time
+from dataclasses import dataclass, field
+
+import polars as pl
+
+from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SEED
+from http_client import make_client
+from metrics import (
+    scrape_elapsed_seconds,
+    scrape_errors_total,
+    scrape_outcodes_done,
+    scrape_outcodes_total,
+    scrape_properties_total,
+    scrape_state,
+)
+from rightmove import resolve_outcode_id, search_outcode
+from spatial import PostcodeSpatialIndex
+from storage import write_parquet
+
+log = logging.getLogger("rightmove")
+
+
+@dataclass
+class ScrapeStatus:
+    state: str = "idle"  # idle | running | done | error
+    channel: str = ""
+    outcode: str = ""
+    outcodes_done: int = 0
+    outcodes_total: int = 0
+    properties_buy: int = 0
+    properties_rent: int = 0
+    errors: list[str] = field(default_factory=list)
+    started_at: float = 0.0
+    finished_at: float = 0.0
+
+
+status = ScrapeStatus()
+status_lock = threading.Lock()
+
+
+def _sync_gauges() -> None:
+    """Push current ScrapeStatus values into Prometheus gauges. Must hold status_lock."""
+    for state in ("idle", "running", "done", "error"):
+        scrape_state.labels(state=state).set(1 if status.state == state else 0)
+    scrape_outcodes_done.set(status.outcodes_done)
+    scrape_outcodes_total.set(status.outcodes_total)
+    scrape_properties_total.labels(channel="buy").set(status.properties_buy)
+    scrape_properties_total.labels(channel="rent").set(status.properties_rent)
+    if status.started_at:
+        end = status.finished_at if status.finished_at else time.time()
+        scrape_elapsed_seconds.set(end - status.started_at)
+    else:
+        scrape_elapsed_seconds.set(0)
+
+
+def load_outcodes() -> list[str]:
+    """Load England-only outcodes from arcgis parquet."""
+    log.info("Loading outcodes from %s", ARCGIS_PATH)
+    df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
+    england = df.filter(pl.col("ctry") == "E92000001")
+    log.info("England postcodes: %d", len(england))
+
+    outcodes = (
+        england.select(pl.col("pcd").str.extract(r"^([A-Z]{1,2}\d[A-Z0-9]?)", 1).alias("outcode"))
+        .drop_nulls()
+        .get_column("outcode")
+        .unique()
+        .sort()
+        .to_list()
+    )
+    log.info("Unique England outcodes: %d", len(outcodes))
+    return outcodes
+
+
+def build_postcode_index() -> PostcodeSpatialIndex:
+    """Build spatial index from arcgis England postcodes."""
+    log.info("Building postcode spatial index from %s", ARCGIS_PATH)
+    df = pl.read_parquet(ARCGIS_PATH, columns=["pcd", "ctry", "lat", "long"])
+    england = df.filter(pl.col("ctry") == "E92000001").drop_nulls(subset=["lat", "long"])
+    return PostcodeSpatialIndex(
+        england.get_column("lat").to_list(),
+        england.get_column("long").to_list(),
+        england.get_column("pcd").to_list(),
+    )
+
+
+def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
+    """Main scrape loop — runs in background thread."""
+    global status
+    with status_lock:
+        status.state = "running"
+        status.started_at = time.time()
+        status.errors = []
+        status.properties_buy = 0
+        status.properties_rent = 0
+        _sync_gauges()
+
+    # Shuffle for geographic diversity
+    shuffled = list(outcodes)
+    random.seed(SEED)
+    random.shuffle(shuffled)
+
+    client = make_client()
+
+    try:
+        for channel_cfg in CHANNELS:
+            channel_name = channel_cfg["channel"]
+            file_suffix = "buy" if channel_name == "BUY" else "rent"
+            all_properties: dict[int, dict] = {}  # dedup by id
+
+            with status_lock:
+                status.channel = channel_name
+                status.outcodes_done = 0
+                status.outcodes_total = len(shuffled)
+
+            log.info("=== Starting %s channel (%d outcodes) ===", channel_name, len(shuffled))
+
+            for i, outcode in enumerate(shuffled):
+                with status_lock:
+                    status.outcode = outcode
+                    status.outcodes_done = i
+
+                log.debug("Outcode %s (%d/%d) — %d properties so far",
+                          outcode, i + 1, len(shuffled), len(all_properties))
+
+                try:
+                    outcode_id = resolve_outcode_id(client, outcode)
+                    if not outcode_id:
+                        log.debug("No Rightmove ID for outcode %s, skipping", outcode)
+                        continue
+
+                    props = search_outcode(client, outcode_id, outcode, channel_cfg, pc_index)
+                    for p in props:
+                        pid = p["id"]
+                        if pid not in all_properties:
+                            all_properties[pid] = p
+
+                    with status_lock:
+                        if channel_name == "BUY":
+                            status.properties_buy = len(all_properties)
+                        else:
+                            status.properties_rent = len(all_properties)
+                        _sync_gauges()
+
+                    log.info("Outcode %s: got %d properties (total: %d)", outcode, len(props), len(all_properties))
+
+                except Exception as e:
+                    msg = f"Error scraping {outcode}/{channel_name}: {e}"
+                    log.error(msg)
+                    scrape_errors_total.inc()
+                    with status_lock:
+                        status.errors.append(msg)
+
+                if i < len(shuffled) - 1:
+                    time.sleep(DELAY_BETWEEN_OUTCODES)
+
+            # Write parquet
+            deduped = list(all_properties.values())
+            output_path = DATA_DIR / f"rightmove_{file_suffix}.parquet"
+            write_parquet(deduped, output_path)
+
+            with status_lock:
+                if channel_name == "BUY":
+                    status.properties_buy = len(deduped)
+                else:
+                    status.properties_rent = len(deduped)
+                status.outcodes_done = len(shuffled)
+                _sync_gauges()
+
+            log.info("=== %s channel complete: %d unique properties ===", channel_name, len(deduped))
+
+        with status_lock:
+            status.state = "done"
+            status.finished_at = time.time()
+            _sync_gauges()
+            elapsed = status.finished_at - status.started_at
+            log.info("Scrape complete in %.0fs — buy: %d, rent: %d",
+                     elapsed, status.properties_buy, status.properties_rent)
+
+    except Exception as e:
+        log.exception("Fatal scrape error")
+        with status_lock:
+            status.state = "error"
+            status.errors.append(f"Fatal: {e}")
+            status.finished_at = time.time()
+            _sync_gauges()
+    finally:
+        client.close()
--- a/finder/spatial.py
+++ b/finder/spatial.py
@ -0,0 +1,33 @@
+import logging
+import math
+from collections import defaultdict
+
+from constants import GRID_CELL_SIZE
+
+log = logging.getLogger("rightmove")
+
+
+class PostcodeSpatialIndex:
+    """Grid-based spatial index over arcgis postcodes for nearest-lookup."""
+
+    def __init__(self, lats: list[float], lngs: list[float], postcodes: list[str]):
+        self.grid: dict[tuple[int, int], list[tuple[float, float, str]]] = defaultdict(list)
+        for lat, lng, pcd in zip(lats, lngs, postcodes):
+            gx = int(math.floor(lng / GRID_CELL_SIZE))
+            gy = int(math.floor(lat / GRID_CELL_SIZE))
+            self.grid[(gx, gy)].append((lat, lng, pcd))
+        log.info("Postcode spatial index: %d cells, %d postcodes", len(self.grid), len(lats))
+
+    def nearest(self, lat: float, lng: float) -> str | None:
+        gx = int(math.floor(lng / GRID_CELL_SIZE))
+        gy = int(math.floor(lat / GRID_CELL_SIZE))
+        best_dist = float("inf")
+        best_pcd = None
+        for dx in range(-1, 2):
+            for dy in range(-1, 2):
+                for plat, plng, pcd in self.grid.get((gx + dx, gy + dy), []):
+                    d = (plat - lat) ** 2 + (plng - lng) ** 2
+                    if d < best_dist:
+                        best_dist = d
+                        best_pcd = pcd
+        return best_pcd
--- a/finder/storage.py
+++ b/finder/storage.py
@ -0,0 +1,65 @@
+import logging
+from pathlib import Path
+
+import polars as pl
+
+log = logging.getLogger("rightmove")
+
+
+def write_parquet(properties: list[dict], path: Path) -> None:
+    """Write properties list to parquet using Polars."""
+    if not properties:
+        log.warning("No properties to write to %s", path)
+        return
+
+    df = pl.DataFrame(
+        {
+            "id": [p["id"] for p in properties],
+            "bedrooms": [p["bedrooms"] for p in properties],
+            "bathrooms": [p["bathrooms"] for p in properties],
+            "total_rooms": [p["total_rooms"] for p in properties],
+            "longitude": [p["longitude"] for p in properties],
+            "latitude": [p["latitude"] for p in properties],
+            "postcode": [p["postcode"] for p in properties],
+            "address": [p["address"] for p in properties],
+            "tenure": [p["tenure"] for p in properties],
+            "property_type": [p["property_type"] for p in properties],
+            "property_sub_type": [p["property_sub_type"] for p in properties],
+            "price": [p["price"] for p in properties],
+            "price_frequency": [p["price_frequency"] for p in properties],
+            "price_qualifier": [p["price_qualifier"] for p in properties],
+            "floorspace_sqm": [p["floorspace_sqm"] for p in properties],
+            "url": [p["url"] for p in properties],
+            "features": [p["features"] for p in properties],
+            "first_visible_date": [p["first_visible_date"] for p in properties],
+            "update_date": [p["update_date"] for p in properties],
+            "outcode": [p["outcode"] for p in properties],
+            "house_share": [p["house_share"] for p in properties],
+        },
+        schema={
+            "id": pl.Int64,
+            "bedrooms": pl.Int32,
+            "bathrooms": pl.Int32,
+            "total_rooms": pl.Int32,
+            "longitude": pl.Float64,
+            "latitude": pl.Float64,
+            "postcode": pl.Utf8,
+            "address": pl.Utf8,
+            "tenure": pl.Utf8,
+            "property_type": pl.Utf8,
+            "property_sub_type": pl.Utf8,
+            "price": pl.Int64,
+            "price_frequency": pl.Utf8,
+            "price_qualifier": pl.Utf8,
+            "floorspace_sqm": pl.Float64,
+            "url": pl.Utf8,
+            "features": pl.List(pl.Utf8),
+            "first_visible_date": pl.Utf8,
+            "update_date": pl.Utf8,
+            "outcode": pl.Utf8,
+            "house_share": pl.Boolean,
+        },
+    )
+
+    df.write_parquet(path)
+    log.info("Wrote %d properties to %s", len(df), path)
--- a/finder/transform.py
+++ b/finder/transform.py
@ -0,0 +1,124 @@
+import logging
+import re
+
+from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
+from spatial import PostcodeSpatialIndex
+
+log = logging.getLogger("rightmove")
+
+
+def parse_display_size(display_size: str | None) -> float | None:
+    """Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
+    if not display_size:
+        return None
+    # Try sq. ft. first
+    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
+    if m:
+        sqft = float(m.group(1).replace(",", ""))
+        return round(sqft * 0.092903, 1)
+    # Try sq. m.
+    m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
+    if m:
+        return round(float(m.group(1).replace(",", "")), 1)
+    return None
+
+
+def map_property_type(sub_type: str | None) -> str:
+    """Map propertySubType to canonical type."""
+    if not sub_type:
+        return "Other"
+    canonical = PROPERTY_TYPE_MAP.get(sub_type)
+    if canonical:
+        return canonical
+    log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
+    return "Other"
+
+
+def extract_tenure(tenure_obj: dict | None) -> str | None:
+    """Extract tenure string from tenure object."""
+    if not tenure_obj:
+        return None
+    tt = tenure_obj.get("tenureType", "")
+    if tt == "FREEHOLD":
+        return "Freehold"
+    if tt == "LEASEHOLD":
+        return "Leasehold"
+    return None
+
+
+def fix_coords(lat: float, lng: float) -> tuple[float, float]:
+    """Swap lat/lng if they look reversed. England: lat ~49–56, lng ~-7–2."""
+    if 49 <= lat <= 56 and -7 <= lng <= 2:
+        return lat, lng
+    if 49 <= lng <= 56 and -7 <= lat <= 2:
+        log.debug("Swapping reversed coords: lat=%.4f lng=%.4f → lat=%.4f lng=%.4f", lat, lng, lng, lat)
+        return lng, lat
+    log.warning("Coords outside England bounds even after swap attempt: lat=%.4f lng=%.4f", lat, lng)
+    return lat, lng
+
+
+def normalize_price(amount: int, frequency: str) -> int:
+    """Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
+    if frequency == "weekly":
+        return round(amount * 52 / 12)
+    if frequency == "yearly":
+        return round(amount / 12)
+    return amount
+
+
+def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex) -> dict | None:
+    """Transform a raw Rightmove property dict into our output schema."""
+    loc = prop.get("location")
+    if not loc:
+        return None
+    raw_lat = loc.get("latitude")
+    raw_lng = loc.get("longitude")
+    if raw_lat is None or raw_lng is None:
+        return None
+
+    lat, lng = fix_coords(raw_lat, raw_lng)
+
+    price_obj = prop.get("price", {})
+    amount = price_obj.get("amount")
+    if amount is None:
+        return None
+    frequency = price_obj.get("frequency", "")
+    price = normalize_price(int(amount), frequency)
+
+    display_prices = price_obj.get("displayPrices", [])
+    price_qualifier = display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
+
+    sub_type = prop.get("propertySubType", "")
+    bedrooms = prop.get("bedrooms", 0) or 0
+    bathrooms = prop.get("bathrooms", 0) or 0
+
+    key_features = [kf.get("description", "") for kf in prop.get("keyFeatures", []) if kf.get("description")]
+
+    listing_update = prop.get("listingUpdate", {})
+    update_date = listing_update.get("listingUpdateDate", "")
+
+    postcode = pc_index.nearest(lat, lng)
+
+    return {
+        "id": prop.get("id"),
+        "bedrooms": bedrooms,
+        "bathrooms": bathrooms,
+        "total_rooms": bedrooms + bathrooms,
+        "longitude": lng,
+        "latitude": lat,
+        "postcode": postcode,
+        "address": prop.get("displayAddress", ""),
+        "tenure": extract_tenure(prop.get("tenure")),
+        "property_type": map_property_type(sub_type),
+        "property_sub_type": sub_type or "Unknown",
+        "price": price,
+        "price_frequency": frequency,
+        "price_qualifier": price_qualifier,
+        "floorspace_sqm": parse_display_size(prop.get("displaySize")),
+        "url": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
+        "features": key_features,
+        "first_visible_date": prop.get("firstVisibleDate", ""),
+        "update_date": update_date,
+        "outcode": outcode,
+        "house_share": sub_type == "House Share",
+    }
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -21,6 +21,7 @@
        "pocketbase": "^0.26.8",
        "react": "^18.2.0",
        "react-dom": "^18.2.0",
+        "react-joyride": "^2.9.3",
        "react-map-gl": "^7.1.0"
      },
      "devDependencies": {
@ -2033,6 +2034,11 @@
      "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==",
      "license": "MIT"
    },
+    "node_modules/@gilbarbara/deep-equal": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/@gilbarbara/deep-equal/-/deep-equal-0.3.1.tgz",
+      "integrity": "sha512-I7xWjLs2YSVMc5gGx1Z3ZG1lgFpITPndpi8Ku55GeEIKpACCPQNS/OTqQbxgTCfq0Ncvcc+CrFov96itVh6Qvw=="
+    },
    "node_modules/@humanwhocodes/config-array": {
      "version": "0.13.0",
      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
@ -4623,7 +4629,6 @@
      "version": "15.7.15",
      "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz",
      "integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==",
-      "devOptional": true,
      "license": "MIT"
    },
    "node_modules/@types/qs": {
@ -4644,7 +4649,6 @@
      "version": "18.3.27",
      "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.27.tgz",
      "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==",
-      "devOptional": true,
      "license": "MIT",
      "dependencies": {
        "@types/prop-types": "*",
@ -6772,7 +6776,6 @@
      "version": "3.2.3",
      "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz",
      "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
-      "devOptional": true,
      "license": "MIT"
    },
    "node_modules/data-uri-to-buffer": {
@ -6856,6 +6859,12 @@
        }
      }
    },
+    "node_modules/deep-diff": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/deep-diff/-/deep-diff-1.0.2.tgz",
+      "integrity": "sha512-aWS3UIVH+NPGCD1kki+DCU9Dua032iSsO43LqQpcs4R3+dVv7tX0qBGjiVHJHjplsoUM2XRO/KB92glqc68awg==",
+      "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info."
+    },
    "node_modules/deep-is": {
      "version": "0.1.4",
      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
@ -6875,6 +6884,14 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/deepmerge": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
+      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
    "node_modules/default-browser": {
      "version": "5.4.0",
      "resolved": "https://registry.npmjs.org/default-browser/-/default-browser-5.4.0.tgz",
@ -9693,6 +9710,11 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "node_modules/is-lite": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/is-lite/-/is-lite-1.2.1.tgz",
+      "integrity": "sha512-pgF+L5bxC+10hLBgf6R2P4ZZUBOQIIacbdo8YvuCP8/JvsWxG7aZ9p10DYuLtifFci4l3VITphhMlMV4Y+urPw=="
+    },
    "node_modules/is-map": {
      "version": "2.0.3",
      "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz",
@ -10710,7 +10732,6 @@
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
-      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=0.10.0"
@ -11253,6 +11274,16 @@
      "resolved": "https://registry.npmjs.org/pocketbase/-/pocketbase-0.26.8.tgz",
      "integrity": "sha512-aQ/ewvS7ncvAE8wxoW10iAZu6ElgbeFpBhKPnCfvRovNzm2gW8u/sQNPGN6vNgVEagz44kK//C61oKjfa+7Low=="
    },
+    "node_modules/popper.js": {
+      "version": "1.16.1",
+      "resolved": "https://registry.npmjs.org/popper.js/-/popper.js-1.16.1.tgz",
+      "integrity": "sha512-Wb4p1J4zyFTbM+u6WuO4XstYx4Ky9Cewe4DWrel7B0w6VVICvPwdOpotjzcf6eD8TsckVnIMNONQyPIUFOUbCQ==",
+      "deprecated": "You can find the new Popper v2 at @popperjs/core, this package is dedicated to the legacy v1",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/popperjs"
+      }
+    },
    "node_modules/possible-typed-array-names": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
@ -11608,7 +11639,6 @@
      "version": "15.8.1",
      "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz",
      "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==",
-      "dev": true,
      "license": "MIT",
      "dependencies": {
        "loose-envify": "^1.4.0",
@ -11854,13 +11884,89 @@
        "react": "^18.3.1"
      }
    },
+    "node_modules/react-floater": {
+      "version": "0.7.9",
+      "resolved": "https://registry.npmjs.org/react-floater/-/react-floater-0.7.9.tgz",
+      "integrity": "sha512-NXqyp9o8FAXOATOEo0ZpyaQ2KPb4cmPMXGWkx377QtJkIXHlHRAGer7ai0r0C1kG5gf+KJ6Gy+gdNIiosvSicg==",
+      "dependencies": {
+        "deepmerge": "^4.3.1",
+        "is-lite": "^0.8.2",
+        "popper.js": "^1.16.0",
+        "prop-types": "^15.8.1",
+        "tree-changes": "^0.9.1"
+      },
+      "peerDependencies": {
+        "react": "15 - 18",
+        "react-dom": "15 - 18"
+      }
+    },
+    "node_modules/react-floater/node_modules/@gilbarbara/deep-equal": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/@gilbarbara/deep-equal/-/deep-equal-0.1.2.tgz",
+      "integrity": "sha512-jk+qzItoEb0D0xSSmrKDDzf9sheQj/BAPxlgNxgmOaA3mxpUa6ndJLYGZKsJnIVEQSD8zcTbyILz7I0HcnBCRA=="
+    },
+    "node_modules/react-floater/node_modules/is-lite": {
+      "version": "0.8.2",
+      "resolved": "https://registry.npmjs.org/is-lite/-/is-lite-0.8.2.tgz",
+      "integrity": "sha512-JZfH47qTsslwaAsqbMI3Q6HNNjUuq6Cmzzww50TdP5Esb6e1y2sK2UAaZZuzfAzpoI2AkxoPQapZdlDuP6Vlsw=="
+    },
+    "node_modules/react-floater/node_modules/tree-changes": {
+      "version": "0.9.3",
+      "resolved": "https://registry.npmjs.org/tree-changes/-/tree-changes-0.9.3.tgz",
+      "integrity": "sha512-vvvS+O6kEeGRzMglTKbc19ltLWNtmNt1cpBoSYLj/iEcPVvpJasemKOlxBrmZaCtDJoF+4bwv3m01UKYi8mukQ==",
+      "dependencies": {
+        "@gilbarbara/deep-equal": "^0.1.1",
+        "is-lite": "^0.8.2"
+      }
+    },
+    "node_modules/react-innertext": {
+      "version": "1.1.5",
+      "resolved": "https://registry.npmjs.org/react-innertext/-/react-innertext-1.1.5.tgz",
+      "integrity": "sha512-PWAqdqhxhHIv80dT9znP2KvS+hfkbRovFp4zFYHFFlOoQLRiawIic81gKb3U1wEyJZgMwgs3JoLtwryASRWP3Q==",
+      "peerDependencies": {
+        "@types/react": ">=0.0.0 <=99",
+        "react": ">=0.0.0 <=99"
+      }
+    },
    "node_modules/react-is": {
      "version": "16.13.1",
      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
-      "dev": true,
      "license": "MIT"
    },
+    "node_modules/react-joyride": {
+      "version": "2.9.3",
+      "resolved": "https://registry.npmjs.org/react-joyride/-/react-joyride-2.9.3.tgz",
+      "integrity": "sha512-1+Mg34XK5zaqJ63eeBhqdbk7dlGCFp36FXwsEvgpjqrtyywX2C6h9vr3jgxP0bGHCw8Ilsp/nRDzNVq6HJ3rNw==",
+      "dependencies": {
+        "@gilbarbara/deep-equal": "^0.3.1",
+        "deep-diff": "^1.0.2",
+        "deepmerge": "^4.3.1",
+        "is-lite": "^1.2.1",
+        "react-floater": "^0.7.9",
+        "react-innertext": "^1.1.5",
+        "react-is": "^16.13.1",
+        "scroll": "^3.0.1",
+        "scrollparent": "^2.1.0",
+        "tree-changes": "^0.11.2",
+        "type-fest": "^4.27.0"
+      },
+      "peerDependencies": {
+        "react": "15 - 18",
+        "react-dom": "15 - 18"
+      }
+    },
+    "node_modules/react-joyride/node_modules/type-fest": {
+      "version": "4.41.0",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz",
+      "integrity": "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA==",
+      "engines": {
+        "node": ">=16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
    "node_modules/react-map-gl": {
      "version": "7.1.9",
      "resolved": "https://registry.npmjs.org/react-map-gl/-/react-map-gl-7.1.9.tgz",
@ -12457,6 +12563,16 @@
        "url": "https://opencollective.com/webpack"
      }
    },
+    "node_modules/scroll": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/scroll/-/scroll-3.0.1.tgz",
+      "integrity": "sha512-pz7y517OVls1maEzlirKO5nPYle9AXsFzTMNJrRGmT951mzpIBy7sNHOg5o/0MQd/NqliCiWnAi0kZneMPFLcg=="
+    },
+    "node_modules/scrollparent": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/scrollparent/-/scrollparent-2.1.0.tgz",
+      "integrity": "sha512-bnnvJL28/Rtz/kz2+4wpBjHzWoEzXhVg/TE8BeVGJHUqE8THNIRnDxDWMktwM+qahvlRdvlLdsQfYe+cuqfZeA=="
+    },
    "node_modules/select-hose": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/select-hose/-/select-hose-2.0.0.tgz",
@ -13754,6 +13870,15 @@
        "node": ">=0.6"
      }
    },
+    "node_modules/tree-changes": {
+      "version": "0.11.3",
+      "resolved": "https://registry.npmjs.org/tree-changes/-/tree-changes-0.11.3.tgz",
+      "integrity": "sha512-r14mvDZ6tqz8PRQmlFKjhUVngu4VZ9d92ON3tp0EGpFBE6PAHOq8Bx8m8ahbNoGE3uI/npjYcJiqVydyOiYXag==",
+      "dependencies": {
+        "@gilbarbara/deep-equal": "^0.3.1",
+        "is-lite": "^1.2.1"
+      }
+    },
    "node_modules/tree-dump": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/tree-dump/-/tree-dump-1.1.0.tgz",
--- a/frontend/package.json
+++ b/frontend/package.json
@ -26,6 +26,7 @@
    "pocketbase": "^0.26.8",
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
+    "react-joyride": "^2.9.3",
    "react-map-gl": "^7.1.0"
  },
  "devDependencies": {
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -4,6 +4,7 @@ import PricingPage from './components/pricing/PricingPage';
 import HomePage from './components/home/HomePage';
 import SavedSearchesPage from './components/saved-searches/SavedSearchesPage';
 import LearnPage from './components/learn/LearnPage';
+import AccountPage from './components/account/AccountPage';
 import Header, { type Page } from './components/ui/Header';
 import AuthModal from './components/ui/AuthModal';
 import SaveSearchModal from './components/ui/SaveSearchModal';
@ -32,6 +33,8 @@ case 'saved-searches':
      return '/learn';
    case 'pricing':
      return '/pricing';
+    case 'account':
+      return '/account';
    default:
      return '/';
  }
@ -42,6 +45,7 @@ function pathToPage(pathname: string): Page | null {
 if (pathname === '/saved') return 'saved-searches';
  if (pathname === '/learn') return 'learn';
  if (pathname === '/pricing') return 'pricing';
+  if (pathname === '/account') return 'account';
  if (pathname === '/') return 'home';
  return null;
 }
@ -92,6 +96,7 @@ export default function App() {
    register,
    logout,
    requestPasswordReset,
+    refreshAuth,
    clearError,
  } = useAuth();
  const [showAuthModal, setShowAuthModal] = useState(false);
@ -233,6 +238,8 @@ export default function App() {
        <PricingPage onOpenDashboard={() => navigateTo('dashboard')} />
      ) : activePage === 'learn' ? (
        <LearnPage />
+      ) : activePage === 'account' && user ? (
+        <AccountPage user={user} onRefreshAuth={refreshAuth} />
      ) : activePage === 'saved-searches' ? (
        <SavedSearchesPage
          searches={savedSearches.searches}
--- a/frontend/src/components/account/AccountPage.tsx
+++ b/frontend/src/components/account/AccountPage.tsx
@ -0,0 +1,131 @@
+import { useState } from 'react';
+import type { AuthUser } from '../../hooks/useAuth';
+import { apiUrl, authHeaders, assertOk } from '../../lib/api';
+import { SpinnerIcon } from '../ui/icons/SpinnerIcon';
+import { CheckIcon } from '../ui/icons/CheckIcon';
+
+const SUBSCRIPTION_OPTIONS = ['free', 'rental', 'buyer'] as const;
+
+const SUBSCRIPTION_LABELS: Record<string, string> = {
+  free: 'Free',
+  rental: 'Rental',
+  buyer: 'Buyer',
+};
+
+export default function AccountPage({
+  user,
+  onRefreshAuth,
+}: {
+  user: AuthUser;
+  onRefreshAuth: () => Promise<void>;
+}) {
+  const [selectedSubscription, setSelectedSubscription] = useState(user.subscription || 'free');
+  const [saving, setSaving] = useState(false);
+  const [saved, setSaved] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const handleSave = async () => {
+    setSaving(true);
+    setError(null);
+    setSaved(false);
+    try {
+      const res = await fetch(apiUrl('subscription'), {
+        method: 'PATCH',
+        ...authHeaders({
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({ subscription: selectedSubscription }),
+        }),
+      });
+      assertOk(res, 'Update subscription');
+      await onRefreshAuth();
+      setSaved(true);
+      setTimeout(() => setSaved(false), 2000);
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Failed to update subscription';
+      setError(msg);
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  const badgeColor =
+    user.subscription === 'buyer'
+      ? 'bg-teal-100 text-teal-700 dark:bg-teal-900/30 dark:text-teal-400'
+      : user.subscription === 'rental'
+        ? 'bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400'
+        : 'bg-warm-100 text-warm-600 dark:bg-warm-700 dark:text-warm-300';
+
+  return (
+    <div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950">
+      <div className="max-w-lg mx-auto px-6 py-16">
+        <h1 className="text-2xl font-bold text-navy-950 dark:text-warm-100 mb-8">Account</h1>
+
+        <div className="bg-white dark:bg-warm-800 rounded-xl border border-warm-200 dark:border-warm-700 divide-y divide-warm-200 dark:divide-warm-700">
+          {/* Email */}
+          <div className="px-5 py-4 flex items-center justify-between">
+            <div>
+              <p className="text-sm text-warm-500 dark:text-warm-400">Email</p>
+              <p className="text-navy-950 dark:text-warm-100 font-medium">{user.email}</p>
+            </div>
+            <span
+              className={`text-xs font-medium px-2 py-0.5 rounded-full ${
+                user.verified
+                  ? 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-400'
+                  : 'bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-400'
+              }`}
+            >
+              {user.verified ? 'Verified' : 'Unverified'}
+            </span>
+          </div>
+
+          {/* Subscription */}
+          <div className="px-5 py-4 flex items-center justify-between">
+            <div>
+              <p className="text-sm text-warm-500 dark:text-warm-400">Subscription</p>
+              <span className={`inline-block text-sm font-medium px-2.5 py-0.5 rounded-full mt-1 ${badgeColor}`}>
+                {SUBSCRIPTION_LABELS[user.subscription] || user.subscription || 'Free'}
+              </span>
+            </div>
+          </div>
+
+          {/* Admin section */}
+          {user.isAdmin && (
+            <div className="px-5 py-4">
+              <p className="text-sm text-warm-500 dark:text-warm-400 mb-3">
+                Admin: Change subscription
+              </p>
+              <div className="flex items-center gap-3">
+                <select
+                  value={selectedSubscription}
+                  onChange={(e) => setSelectedSubscription(e.target.value)}
+                  className="flex-1 px-3 py-2 rounded-lg border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-900 text-navy-950 dark:text-warm-200 text-sm"
+                >
+                  {SUBSCRIPTION_OPTIONS.map((opt) => (
+                    <option key={opt} value={opt}>
+                      {SUBSCRIPTION_LABELS[opt]}
+                    </option>
+                  ))}
+                </select>
+                <button
+                  onClick={handleSave}
+                  disabled={saving || selectedSubscription === user.subscription}
+                  className="px-4 py-2 rounded-lg bg-teal-600 hover:bg-teal-700 text-white text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
+                >
+                  {saving ? (
+                    <SpinnerIcon className="w-4 h-4 animate-spin" />
+                  ) : saved ? (
+                    <CheckIcon className="w-4 h-4" />
+                  ) : null}
+                  {saved ? 'Saved' : 'Save'}
+                </button>
+              </div>
+              {error && (
+                <p className="mt-2 text-sm text-red-600 dark:text-red-400">{error}</p>
+              )}
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
--- a/frontend/src/components/home/HexCanvas.tsx
+++ b/frontend/src/components/home/HexCanvas.tsx
@ -1,131 +1,63 @@
-import { useRef, useEffect } from 'react';
+import { useMemo } from 'react';

-const HEX_COUNT = 70;
-const TAU = Math.PI * 2;
+const HEX_COUNT = 50;

-interface Hex {
-  x: number;
-  y: number;
-  baseY: number;
+interface HexConfig {
  size: number;
  opacity: number;
-  speed: number;
-  phase: number;
+  top: number;
+  driftDuration: number;
+  bobDuration: number;
+  bobAmount: number;
+  delay: number;
+  reverse: boolean;
 }

-function initHexes(w: number, h: number): Hex[] {
-  const hexes: Hex[] = [];
+function generateHexes(): HexConfig[] {
+  const hexes: HexConfig[] = [];
  for (let i = 0; i < HEX_COUNT; i++) {
-    const y = Math.random() * h;
-    const side = Math.random() < 0.5 ? 'left' : 'right';
-    const x = side === 'left' ? Math.random() * w * 0.3 : w * 0.7 + Math.random() * w * 0.3;
+    const driftDuration = 18 + Math.random() * 35;
    hexes.push({
-      x,
-      y,
-      baseY: y,
-      size: 8 + Math.random() * 20,
-      opacity: 0.08 + Math.random() * 0.15,
-      speed: 6 + Math.random() * 14,
-      phase: Math.random() * TAU,
+      size: 10 + Math.random() * 32,
+      opacity: 0.06 + Math.random() * 0.18,
+      top: Math.random() * 100,
+      driftDuration,
+      bobDuration: 3 + Math.random() * 5,
+      bobAmount: 8 + Math.random() * 30,
+      delay: -Math.random() * driftDuration,
+      reverse: Math.random() < 0.3,
    });
  }
  return hexes;
 }

-function drawHex(ctx: CanvasRenderingContext2D, cx: number, cy: number, r: number) {
-  ctx.beginPath();
-  for (let i = 0; i < 6; i++) {
-    const angle = (TAU / 6) * i - Math.PI / 6;
-    const px = cx + r * Math.cos(angle);
-    const py = cy + r * Math.sin(angle);
-    if (i === 0) ctx.moveTo(px, py);
-    else ctx.lineTo(px, py);
-  }
-  ctx.closePath();
-}
-
 export default function HexCanvas({ isDark = false }: { isDark?: boolean }) {
-  const canvasRef = useRef<HTMLCanvasElement>(null);
-  const hexesRef = useRef<Hex[]>([]);
-  const animRef = useRef(0);
-  const isDarkRef = useRef(isDark);
-  isDarkRef.current = isDark;
-
-  useEffect(() => {
-    const canvas = canvasRef.current;
-    if (!canvas) return;
-    const ctx = canvas.getContext('2d');
-    if (!ctx) return;
-
-    let w = 0;
-    let h = 0;
-
-    function resize() {
-      const dpr = window.devicePixelRatio || 1;
-      const rect = canvas!.parentElement!.getBoundingClientRect();
-      w = rect.width;
-      h = rect.height;
-      canvas!.width = w * dpr;
-      canvas!.height = h * dpr;
-      canvas!.style.width = `${w}px`;
-      canvas!.style.height = `${h}px`;
-      ctx!.setTransform(dpr, 0, 0, dpr, 0, 0);
-      hexesRef.current = initHexes(w, h);
-    }
-
-    resize();
-    const ro = new ResizeObserver(resize);
-    ro.observe(canvas.parentElement!);
-
-    let prev = performance.now();
-
-    function frame(now: number) {
-      const dt = (now - prev) / 1000;
-      prev = now;
-      ctx!.clearRect(0, 0, w, h);
-
-      for (const hex of hexesRef.current) {
-        hex.x += hex.speed * dt * 0.3;
-        if (hex.x > w * 0.3 + hex.size && hex.x < w * 0.7 - hex.size) {
-          hex.x = w * 0.7 + hex.size;
-        }
-        if (hex.x > w + hex.size * 2) {
-          hex.x = -hex.size * 2;
-          hex.y = Math.random() * h;
-          hex.baseY = hex.y;
-        }
-
-        const bob = Math.sin(now / 1000 + hex.phase) * 8;
-        hex.y = hex.baseY + bob;
-
-        const dark = isDarkRef.current;
-        ctx!.globalAlpha = hex.opacity * (dark ? 0.6 : 1);
-        ctx!.fillStyle = dark ? '#058172' : '#00a28c';
-        drawHex(ctx!, hex.x, hex.y, hex.size);
-        ctx!.fill();
-
-        ctx!.globalAlpha = hex.opacity * 0.5 * (dark ? 0.6 : 1);
-        ctx!.strokeStyle = dark ? '#0a665b' : '#05c9aa';
-        ctx!.lineWidth = 1;
-        drawHex(ctx!, hex.x, hex.y, hex.size);
-        ctx!.stroke();
-      }
-
-      animRef.current = requestAnimationFrame(frame);
-    }
-
-    animRef.current = requestAnimationFrame(frame);
-    return () => {
-      cancelAnimationFrame(animRef.current);
-      ro.disconnect();
-    };
-  }, []);
+  const hexes = useMemo(generateHexes, []);

  return (
-    <canvas
-      ref={canvasRef}
-      className="absolute inset-0 pointer-events-none"
-      style={{ zIndex: 0 }}
-    />
+    <div className="absolute inset-0 overflow-hidden pointer-events-none" style={{ zIndex: 0 }}>
+      {hexes.map((hex, i) => (
+        <div
+          key={i}
+          className="absolute"
+          style={{
+            top: `${hex.top}%`,
+            animation: `hex-drift ${hex.driftDuration}s linear ${hex.delay}s infinite${hex.reverse ? ' reverse' : ''}`,
+          }}
+        >
+          <div
+            className="bg-teal-500"
+            style={{
+              width: hex.size,
+              height: hex.size,
+              opacity: hex.opacity * (isDark ? 0.6 : 1),
+              clipPath: 'polygon(50% 0%, 100% 25%, 100% 75%, 50% 100%, 0% 75%, 0% 25%)',
+              animation: `hex-bob ${hex.bobDuration}s ease-in-out infinite`,
+              '--bob': `${hex.bobAmount}px`,
+            } as React.CSSProperties}
+          />
+        </div>
+      ))}
+    </div>
  );
 }
--- a/frontend/src/components/home/HomePage.tsx
+++ b/frontend/src/components/home/HomePage.tsx
@ -43,7 +43,7 @@ export default function HomePage({
          <HexCanvas isDark={theme === 'dark'} />
          {/* Radial teal glow */}
          <div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-[600px] h-[400px] bg-teal-500/[0.07] rounded-full blur-3xl pointer-events-none" />
-          <div className="relative z-10 max-w-4xl mx-auto px-6">
+          <div className="relative z-10 max-w-4xl mx-auto px-6 md:px-10 py-6 backdrop-blur-sm bg-navy-950/30 rounded-2xl">
            <p className="text-teal-400 font-semibold tracking-wide uppercase text-sm mb-4">
              Browsing listings is not a strategy. Knowing what you want is.
            </p>
--- a/frontend/src/components/map/Filters.tsx
+++ b/frontend/src/components/map/Filters.tsx
@ -119,7 +119,37 @@ export default memo(function Filters({
  onAiFilterSubmit,
 }: FiltersProps) {
  const availableFeatures = features.filter((f) => !enabledFeatures.has(f.name));
-  const enabledFeatureList = features.filter((f) => enabledFeatures.has(f.name));
+  const enabledFeatureList = features.filter(
+    (f) => enabledFeatures.has(f.name) && f.name !== 'Listing status'
+  );
+
+  const listingToggles = useMemo(() => {
+    const val = filters['Listing status'] as string[] | undefined;
+    if (!val) return { historical: true, buy: true, rent: true };
+    return {
+      historical: val.includes('Historical sale'),
+      buy: val.includes('For sale'),
+      rent: val.includes('For rent'),
+    };
+  }, [filters]);
+
+  const handleListingToggle = useCallback(
+    (key: 'historical' | 'buy' | 'rent') => {
+      const next = { ...listingToggles, [key]: !listingToggles[key] };
+      const allOn = next.historical && next.buy && next.rent;
+      const allOff = !next.historical && !next.buy && !next.rent;
+      if (allOn || allOff) {
+        onRemoveFilter('Listing status');
+        return;
+      }
+      const values: string[] = [];
+      if (next.historical) values.push('Historical sale');
+      if (next.buy) values.push('For sale');
+      if (next.rent) values.push('For rent');
+      onFilterChange('Listing status', values);
+    },
+    [listingToggles, onFilterChange, onRemoveFilter]
+  );

  const containerRef = useRef<HTMLDivElement>(null);
  const [showPhilosophy, setShowPhilosophy] = useState(false);
@ -155,7 +185,8 @@ export default memo(function Filters({
    return scales;
  }, [features]);

-  const badgeCount = enabledFeatureList.length + activeModes.length;
+  const hasListingFilter = !listingToggles.historical || !listingToggles.buy || !listingToggles.rent;
+  const badgeCount = enabledFeatureList.length + activeModes.length + (hasListingFilter ? 1 : 0);

  return (
    <div ref={containerRef} className="flex flex-col bg-white dark:bg-navy-950 overflow-y-auto md:overflow-hidden h-full">
@ -171,6 +202,17 @@ export default memo(function Filters({
          </button>
        </div>
      </div>
+      <div className="shrink-0 flex items-center gap-2 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
+        <span className="text-xs font-medium text-warm-500 dark:text-warm-400">Show</span>
+        <PillGroup>
+          <PillToggle label="Historical" active={listingToggles.historical}
+            onClick={() => handleListingToggle('historical')} size="xs" />
+          <PillToggle label="Buy" active={listingToggles.buy}
+            onClick={() => handleListingToggle('buy')} size="xs" />
+          <PillToggle label="Rent" active={listingToggles.rent}
+            onClick={() => handleListingToggle('rent')} size="xs" />
+        </PillGroup>
+      </div>
      <div className="shrink-0 md:shrink md:min-h-0 flex flex-col md:basis-[40%]">
        <div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
          <div className="flex items-center gap-2">
--- a/frontend/src/components/map/LocationSearch.tsx
+++ b/frontend/src/components/map/LocationSearch.tsx
@ -118,7 +118,7 @@ export default function LocationSearch({
  }

  return (
-    <div ref={containerRef} className="absolute top-3 left-3 z-10 flex flex-col">
+    <div ref={containerRef} data-tutorial="search" className="absolute top-3 left-3 z-10 flex flex-col">
      <div className="flex items-center shadow-lg rounded overflow-hidden bg-white dark:bg-warm-800">
        <SearchIcon className="w-4 h-4 text-warm-400 dark:text-warm-500 ml-3 shrink-0" />
        <PlaceSearchInput
--- a/frontend/src/components/map/Map.tsx
+++ b/frontend/src/components/map/Map.tsx
@ -37,7 +37,7 @@ interface MapProps {
  features: FeatureMeta[];
  selectedHexagonId: string | null;
  hoveredHexagonId: string | null;
-  onHexagonClick: (id: string, isPostcode?: boolean) => void;
+  onHexagonClick: (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => void;
  onHexagonHover: (h3: string | null, x?: number, y?: number) => void;
  initialViewState?: ViewState;
  theme?: 'light' | 'dark';
--- a/frontend/src/components/map/MapPage.tsx
+++ b/frontend/src/components/map/MapPage.tsx
@ -1,5 +1,5 @@
 import { useState, useEffect, useMemo, useCallback } from 'react';
-import type { FeatureMeta, FeatureFilters, POICategoryGroup, ViewState } from '../../types';
+import type { FeatureMeta, FeatureFilters, POICategoryGroup, ViewState, PostcodeGeometry } from '../../types';
 import type { SearchedLocation } from './LocationSearch';
 import type { Page } from '../ui/Header';
 import Map from './Map';
@ -18,6 +18,9 @@ import { usePaneResize } from '../../hooks/usePaneResize';
 import { useAiFilters } from '../../hooks/useAiFilters';
 import { useAreaSummary } from '../../hooks/useAreaSummary';
 import { useUrlSync } from '../../hooks/useUrlSync';
+import { useTutorial } from '../../hooks/useTutorial';
+import { getTutorialStyles } from '../../lib/tutorial-styles';
+import Joyride from 'react-joyride';
 import {
  useTravelTime,
  TRANSPORT_MODES,
@ -191,8 +194,8 @@ export default function MapPage({
  // On mobile, open drawer and switch tab when hexagon is clicked
  const { handleHexagonClick } = selection;
  const handleMobileHexagonClick = useCallback(
-    (id: string, isPostcode?: boolean) => {
-      handleHexagonClick(id, isPostcode);
+    (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => {
+      handleHexagonClick(id, isPostcode, geometry);
      if (id) {
        setMobileDrawerOpen(true);
      }
@ -225,6 +228,9 @@ export default function MapPage({
    mapData.resolution,
  ]);

+  // Tutorial
+  const tutorial = useTutorial(initialLoading, isMobile);
+
  // AI area summary
  const aiSummary = useAreaSummary({
    stats: selection.areaStats,
@ -551,8 +557,20 @@ export default function MapPage({
        </div>
      )}

+      <Joyride
+        steps={tutorial.steps}
+        run={tutorial.run}
+        continuous
+        showProgress
+        showSkipButton
+        callback={tutorial.handleCallback}
+        styles={getTutorialStyles(theme)}
+        disableScrolling
+      />
+
      {/* Left Pane */}
      <div
+        data-tutorial="filters"
        className="flex bg-white dark:bg-navy-950 shadow-lg overflow-hidden"
        style={{ width: leftPaneWidth }}
      >
@ -566,7 +584,7 @@ export default function MapPage({
      </div>

      {/* Map */}
-      <div className="flex-1 relative">
+      <div data-tutorial="map" className="flex-1 relative">
        <Map
          data={mapData.data}
          postcodeData={mapData.postcodeData}
@ -599,6 +617,7 @@ export default function MapPage({
        )}
        {/* Floating POI button */}
        <button
+          data-tutorial="poi-button"
          onClick={() => setPoiPaneOpen((p) => !p)}
          className={`absolute bottom-4 right-4 z-10 p-2 rounded-lg shadow-lg bg-white dark:bg-warm-800 ${poiPaneOpen ? 'text-teal-600 dark:text-teal-400' : 'text-warm-500 dark:text-warm-400 hover:text-teal-600 dark:hover:text-teal-400'}`}
        >
@ -614,6 +633,7 @@ export default function MapPage({

      {/* Right Pane */}
      <div
+        data-tutorial="right-pane"
        className="flex bg-white dark:bg-navy-950 shadow-lg z-10"
        style={{ width: rightPaneWidth }}
      >
--- a/frontend/src/components/ui/Header.tsx
+++ b/frontend/src/components/ui/Header.tsx
@ -13,7 +13,7 @@ import { SpinnerIcon } from './icons/SpinnerIcon';
 import UserMenu from './UserMenu';
 import MobileMenu from './MobileMenu';

-export type Page = 'home' | 'dashboard' | 'saved-searches' | 'learn' | 'pricing';
+export type Page = 'home' | 'dashboard' | 'saved-searches' | 'learn' | 'pricing' | 'account';

 export default function Header({
  activePage,
@ -200,7 +200,7 @@ export default function Header({
        {!isMobile && (
          <>
            {user ? (
-              <UserMenu user={user} onLogout={onLogout} />
+              <UserMenu user={user} onLogout={onLogout} onPageChange={onPageChange} />
            ) : (
              <>
                <button
--- a/frontend/src/components/ui/MobileMenu.tsx
+++ b/frontend/src/components/ui/MobileMenu.tsx
@ -83,6 +83,7 @@ export default function MobileMenu({
          {user && mobileNavItem('saved-searches', 'Saved')}
          {mobileNavItem('learn', 'Learn')}
          {mobileNavItem('pricing', 'Pricing')}
+          {user && mobileNavItem('account', 'Account')}

          {/* Dashboard actions */}
          {activePage === 'dashboard' && (
--- a/frontend/src/components/ui/UserMenu.tsx
+++ b/frontend/src/components/ui/UserMenu.tsx
@ -1,7 +1,16 @@
 import { useState, useRef, useEffect } from 'react';
 import type { AuthUser } from '../../hooks/useAuth';
+import type { Page } from './Header';

-export default function UserMenu({ user, onLogout }: { user: AuthUser; onLogout: () => void }) {
+export default function UserMenu({
+  user,
+  onLogout,
+  onPageChange,
+}: {
+  user: AuthUser;
+  onLogout: () => void;
+  onPageChange: (page: Page) => void;
+}) {
  const [open, setOpen] = useState(false);
  const menuRef = useRef<HTMLDivElement>(null);

@ -37,6 +46,15 @@ export default function UserMenu({ user, onLogout }: { user: AuthUser; onLogout:
            </p>
          </div>
          <div className="p-1">
+            <button
+              onClick={() => {
+                setOpen(false);
+                onPageChange('account');
+              }}
+              className="w-full text-left px-3 py-2 text-sm text-warm-700 dark:text-warm-300 hover:bg-warm-50 dark:hover:bg-warm-700 rounded"
+            >
+              Account
+            </button>
            <button
              onClick={() => {
                setOpen(false);
--- a/frontend/src/hooks/useAuth.ts
+++ b/frontend/src/hooks/useAuth.ts
@ -5,6 +5,8 @@ export interface AuthUser {
  id: string;
  email: string;
  verified: boolean;
+  isAdmin: boolean;
+  subscription: string;
 }

 function recordToUser(record: { id: string; [key: string]: unknown }): AuthUser {
@ -15,6 +17,8 @@ function recordToUser(record: { id: string; [key: string]: unknown }): AuthUser
    id: record.id,
    email: record.email,
    verified: typeof record.verified === 'boolean' ? record.verified : false,
+    isAdmin: typeof record.is_admin === 'boolean' ? record.is_admin : false,
+    subscription: typeof record.subscription === 'string' ? record.subscription : 'free',
  };
 }

@ -110,6 +114,11 @@ export function useAuth() {
    }
  }, []);

+  const refreshAuth = useCallback(async () => {
+    const result = await pb.collection('users').authRefresh();
+    setUser(recordToUser(result.record));
+  }, []);
+
  const clearError = useCallback(() => {
    setError(null);
  }, []);
@ -123,6 +132,7 @@ export function useAuth() {
    loginWithOAuth,
    logout,
    requestPasswordReset,
+    refreshAuth,
    clearError,
  };
 }
--- a/frontend/src/hooks/useDeckLayers.ts
+++ b/frontend/src/hooks/useDeckLayers.ts
@ -1,4 +1,4 @@
-import { useCallback, useRef, useState, useMemo } from 'react';
+import { useCallback, useRef, useState, useMemo, useEffect } from 'react';
 import { H3HexagonLayer } from '@deck.gl/geo-layers';
 import { GeoJsonLayer, IconLayer, TextLayer, ScatterplotLayer } from '@deck.gl/layers';
 import type { PickingInfo } from '@deck.gl/core';
@ -18,6 +18,7 @@ import {
  type TransportMode,
  type TravelTimeEntries,
 } from './useTravelTime';
+import { MarchingAntsExtension } from '../lib/MarchingAntsExtension';

 /** Convert POI id (e.g. "n12345") to OpenStreetMap URL */
 function osmIdToUrl(id: string): string | null {
@ -40,7 +41,7 @@ interface UseDeckLayersProps {
  features: FeatureMeta[];
  selectedHexagonId: string | null;
  hoveredHexagonId: string | null;
-  onHexagonClick: (id: string, isPostcode?: boolean) => void;
+  onHexagonClick: (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => void;
  onHexagonHover: (h3: string | null, x?: number, y?: number) => void;
  theme: 'light' | 'dark';
  selectedPostcodeGeometry?: PostcodeGeometry | null;
@ -89,9 +90,18 @@ export function useDeckLayers({
 }: UseDeckLayersProps) {
  const [popupInfo, setPopupInfo] = useState<PopupInfo | null>(null);
  const [hoverPosition, setHoverPosition] = useState<{ x: number; y: number } | null>(null);
-  const [selectedPostcode, setSelectedPostcode] = useState<string | null>(null);
  const [hoveredPostcode, setHoveredPostcode] = useState<string | null>(null);

+  // Marching ants animation
+  const [marchTime, setMarchTime] = useState(0);
+  const hasPostcodeGeometry = selectedPostcodeGeometry != null;
+  useEffect(() => {
+    if (!hasPostcodeGeometry) return;
+    setMarchTime(0);
+    const id = setInterval(() => setMarchTime((t) => t + 0.3), 50);
+    return () => clearInterval(id);
+  }, [hasPostcodeGeometry]);
+
  const isDark = theme === 'dark';
  const densityGradient = isDark ? DENSITY_GRADIENT_DARK : DENSITY_GRADIENT;

@ -110,8 +120,6 @@ export function useDeckLayers({
  selectedHexagonIdRef.current = selectedHexagonId;
  const hoveredHexagonIdRef = useRef(hoveredHexagonId);
  hoveredHexagonIdRef.current = hoveredHexagonId;
-  const selectedPostcodeRef = useRef(selectedPostcode);
-  selectedPostcodeRef.current = selectedPostcode;
  const hoveredPostcodeRef = useRef(hoveredPostcode);
  hoveredPostcodeRef.current = hoveredPostcode;

@ -233,8 +241,7 @@ export function useDeckLayers({
  const handlePostcodeClick = useCallback((info: PickingInfo<any>) => {
    const pc = info.object?.properties?.postcode;
    if (pc) {
-      setSelectedPostcode((prev) => (prev === pc ? null : pc));
-      onHexagonClickRef.current(pc, true);
+      onHexagonClickRef.current(pc, true, info.object?.geometry);
    }
  }, []);

@ -265,7 +272,7 @@ export function useDeckLayers({
  }, [travelTimeEntries, travelTimeColorRanges]);

  const colorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${countRange.min}|${countRange.max}|${selectedHexagonId}|${hoveredHexagonId}|${theme}|${ttTrigger}`;
-  const postcodeColorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${postcodeCountRange.min}|${postcodeCountRange.max}|${selectedPostcode}|${hoveredPostcode}|${theme}|${ttTrigger}`;
+  const postcodeColorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${postcodeCountRange.min}|${postcodeCountRange.max}|${hoveredPostcode}|${theme}|${ttTrigger}`;

  // --- Layers ---
  const hexLayer = useMemo(
@ -423,8 +430,6 @@ export function useDeckLayers({
        getLineColor: (f) => {
          const pc = f.properties.postcode;
          const dark = isDarkRef.current;
-          if (pc === selectedPostcodeRef.current)
-            return [255, 255, 255, 255] as [number, number, number, number];
          if (pc === hoveredPostcodeRef.current)
            return [29, 228, 195, 200] as [number, number, number, number];
          return (dark ? [180, 170, 160, 100] : [100, 100, 100, 150]) as [
@ -436,7 +441,6 @@ export function useDeckLayers({
        },
        getLineWidth: (f) => {
          const pc = f.properties.postcode;
-          if (pc === selectedPostcodeRef.current) return 3;
          if (pc === hoveredPostcodeRef.current) return 2;
          return 1;
        },
@ -500,37 +504,28 @@ export function useDeckLayers({
    [pois, stablePoiHover]
  );

-  // Check if the selected postcode has data (passes current filters)
-  const selectedPostcodeHasData = useMemo(() => {
-    if (!selectedPostcodeGeometry || !selectedHexagonId) return false;
-    return postcodeData.some((f) => f.properties.postcode === selectedHexagonId);
-  }, [selectedPostcodeGeometry, selectedHexagonId, postcodeData]);
-
-  // Highlight layer for selected postcode (from search)
-  const selectedPostcodeHighlightLayer = useMemo(() => {
+  // Marching ants highlight layer for selected postcode (click or search)
+  const marchingAntsLayer = useMemo(() => {
    if (!selectedPostcodeGeometry) return null;
-    const hasData = selectedPostcodeHasData;
-    const feature = {
-      type: 'Feature' as const,
-      geometry: selectedPostcodeGeometry,
-      properties: {},
-    };
    return new GeoJsonLayer({
-      id: 'searched-postcode-highlight',
-      data: [feature],
-      getFillColor: hasData
-        ? [29, 228, 195, 40] // teal tint when has data
-        : [255, 180, 0, 30], // orange tint when filtered out
-      getLineColor: hasData
-        ? [29, 228, 195, 255] // solid teal when has data
-        : [255, 180, 0, 200], // orange when filtered out (no matching properties)
-      getLineWidth: hasData ? 4 : 3,
-      lineWidthUnits: 'pixels',
+      id: 'marching-ants',
+      data: [
+        {
+          type: 'Feature' as const,
+          geometry: selectedPostcodeGeometry,
+          properties: {},
+        },
+      ],
+      filled: false,
      stroked: true,
-      filled: true,
+      getLineColor: [29, 228, 195, 255],
+      getLineWidth: 3,
+      lineWidthUnits: 'pixels' as const,
      pickable: false,
+      marchTime,
+      extensions: [new MarchingAntsExtension()],
    });
-  }, [selectedPostcodeGeometry, selectedPostcodeHasData]);
+  }, [selectedPostcodeGeometry, marchTime]);

  // Destination markers: one red dot per mode with a destination
  const destinationMarkerData = useMemo(() => {
@ -566,7 +561,7 @@ export function useDeckLayers({
    const baseLayers: any[] = usePostcodeView
      ? [postcodeLayer, postcodeLabelsLayer, poiLayer]
      : [hexLayer, poiLayer];
-    if (selectedPostcodeHighlightLayer) baseLayers.push(selectedPostcodeHighlightLayer);
+    if (marchingAntsLayer) baseLayers.push(marchingAntsLayer);
    if (destinationMarkerLayer) baseLayers.push(destinationMarkerLayer);
    return baseLayers;
  }, [
@ -575,7 +570,7 @@ export function useDeckLayers({
    postcodeLayer,
    postcodeLabelsLayer,
    poiLayer,
-    selectedPostcodeHighlightLayer,
+    marchingAntsLayer,
    destinationMarkerLayer,
  ]);

@ -594,7 +589,6 @@ export function useDeckLayers({
    postcodeCountRange,
    colorFeatureMeta,
    handleMouseLeave,
-    selectedPostcode,
    hoveredPostcode,
    primaryTravelMode,
  };
--- a/frontend/src/hooks/useHexagonSelection.ts
+++ b/frontend/src/hooks/useHexagonSelection.ts
@ -99,15 +99,16 @@ export function useHexagonSelection({ filters, features, resolution }: UseHexago
  );

  const handleHexagonClick = useCallback(
-    (id: string, isPostcode = false) => {
-      setSelectedPostcodeGeometry(null);
+    (id: string, isPostcode = false, geometry?: PostcodeGeometry) => {
      if (selectedHexagon?.id === id) {
        setSelectedHexagon(null);
        setProperties([]);
        setAreaStats(null);
+        setSelectedPostcodeGeometry(null);
      } else {
        const type = isPostcode ? 'postcode' : 'hexagon';
        setSelectedHexagon({ id, type, resolution });
+        setSelectedPostcodeGeometry(isPostcode && geometry ? geometry : null);
        setProperties([]);
        setPropertiesTotal(0);
        setPropertiesOffset(0);
--- a/frontend/src/hooks/useTutorial.ts
+++ b/frontend/src/hooks/useTutorial.ts
@ -0,0 +1,86 @@
+import { useState, useCallback, useMemo } from 'react';
+import type { Step, CallBackProps } from 'react-joyride';
+import { ACTIONS, EVENTS, STATUS } from 'react-joyride';
+
+const STORAGE_KEY = 'tutorial_completed';
+
+const STEPS: Step[] = [
+  {
+    target: '[data-tutorial="filters"]',
+    title: 'Filter Properties',
+    content:
+      'Use filters to narrow down properties by price, energy rating, floor area, and more. Pin a filter to colour the map by that feature.',
+    placement: 'right',
+    disableBeacon: true,
+  },
+  {
+    target: '[data-tutorial="map"]',
+    title: 'Explore the Map',
+    content:
+      'Pan and zoom to explore property data across the UK. Click any hexagon to see detailed stats and individual properties.',
+    placement: 'bottom',
+    disableBeacon: true,
+  },
+  {
+    target: '[data-tutorial="search"]',
+    title: 'Search Locations',
+    content:
+      'Search for a place name or postcode to jump directly to that area on the map.',
+    placement: 'bottom',
+    disableBeacon: true,
+  },
+  {
+    target: '[data-tutorial="right-pane"]',
+    title: 'Area Stats & Properties',
+    content:
+      'After clicking a hexagon, view aggregated area statistics or browse individual properties in this pane.',
+    placement: 'left',
+    disableBeacon: true,
+  },
+  {
+    target: '[data-tutorial="poi-button"]',
+    title: 'Points of Interest',
+    content:
+      'Toggle points of interest like schools, shops, and transport stops to see what amenities are nearby.',
+    placement: 'left',
+    disableBeacon: true,
+  },
+];
+
+export function useTutorial(initialLoading: boolean, isMobile: boolean) {
+  const [run, setRun] = useState(() => {
+    if (isMobile) return false;
+    return !localStorage.getItem(STORAGE_KEY);
+  });
+
+  const shouldRun = run && !initialLoading && !isMobile;
+
+  const handleCallback = useCallback((data: CallBackProps) => {
+    const { status, action, type } = data;
+
+    if (status === STATUS.FINISHED || status === STATUS.SKIPPED) {
+      localStorage.setItem(STORAGE_KEY, '1');
+      setRun(false);
+    }
+    // Also stop if user closes a tooltip via the X button
+    if (action === ACTIONS.CLOSE && type === EVENTS.STEP_AFTER) {
+      localStorage.setItem(STORAGE_KEY, '1');
+      setRun(false);
+    }
+  }, []);
+
+  const resetTutorial = useCallback(() => {
+    localStorage.removeItem(STORAGE_KEY);
+    setRun(true);
+  }, []);
+
+  return useMemo(
+    () => ({
+      steps: STEPS,
+      run: shouldRun,
+      handleCallback,
+      resetTutorial,
+    }),
+    [shouldRun, handleCallback, resetTutorial]
+  );
+}
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@ -40,6 +40,17 @@ h3 {
    color 0.2s ease;
 }

+/* Hexagon background animations */
+@keyframes hex-drift {
+  from { transform: translateX(-5vw); }
+  to { transform: translateX(105vw); }
+}
+
+@keyframes hex-bob {
+  0%, 100% { transform: translateY(var(--bob)); }
+  50% { transform: translateY(calc(var(--bob) * -1)); }
+}
+
 /* Fade-in animation for homepage sections */
 .fade-in-section {
  opacity: 0;
--- a/frontend/src/lib/MarchingAntsExtension.ts
+++ b/frontend/src/lib/MarchingAntsExtension.ts
@ -0,0 +1,53 @@
+import { LayerExtension } from '@deck.gl/core';
+
+/** Animates a marching-ants border on PathLayer sublayers (alternating white/green dashes). */
+export class MarchingAntsExtension extends LayerExtension {
+  static extensionName = 'MarchingAntsExtension';
+  static defaultProps = {
+    marchTime: { type: 'number', value: 0 },
+  };
+
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  isEnabled(layer: any): boolean {
+    return 'pathTesselator' in layer.state;
+  }
+
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  getShaders(extension: any): any {
+    if (!extension.isEnabled(this)) return null;
+    return {
+      modules: [
+        {
+          name: 'marchingAnts',
+          inject: {
+            'fs:#decl': `\
+uniform marchingAntsUniforms {
+  float marchTime;
+} marchingAnts;`,
+            'fs:DECKGL_FILTER_COLOR': `\
+float marchSegLen = 4.0;
+float marchPos = mod(vPathPosition.y - marchingAnts.marchTime, marchSegLen * 2.0);
+if (marchPos < marchSegLen) {
+  color = vec4(1.0, 1.0, 1.0, color.a);
+} else {
+  color = vec4(0.114, 0.894, 0.765, color.a);
+}`,
+          },
+          uniformTypes: {
+            marchTime: 'f32',
+          },
+        },
+      ],
+    };
+  }
+
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  updateState(_params: any, extension: any): void {
+    if (!extension.isEnabled(this)) return;
+    // @ts-expect-error setShaderModuleProps exists on Layer
+    this.setShaderModuleProps({
+      // @ts-expect-error marchTime is a custom prop from this extension
+      marchingAnts: { marchTime: this.props.marchTime || 0 },
+    });
+  }
+}
--- a/frontend/src/lib/tutorial-styles.ts
+++ b/frontend/src/lib/tutorial-styles.ts
@ -0,0 +1,52 @@
+import type { Styles } from 'react-joyride';
+
+export function getTutorialStyles(theme: 'light' | 'dark'): Partial<Styles> {
+  const isDark = theme === 'dark';
+
+  return {
+    options: {
+      arrowColor: isDark ? '#292524' : '#ffffff',
+      backgroundColor: isDark ? '#292524' : '#ffffff',
+      overlayColor: isDark ? 'rgba(10,14,26,0.75)' : 'rgba(0,0,0,0.5)',
+      primaryColor: '#00a28c',
+      textColor: isDark ? '#d6d3d1' : '#44403c',
+      zIndex: 1000,
+    },
+    tooltip: {
+      borderRadius: 8,
+      padding: 16,
+    },
+    tooltipTitle: {
+      color: isDark ? '#f5f5f4' : '#0a0e1a',
+      fontSize: 15,
+      fontWeight: 600,
+    },
+    tooltipContent: {
+      fontSize: 13,
+      lineHeight: 1.5,
+      padding: '8px 0 0',
+    },
+    buttonNext: {
+      borderRadius: 6,
+      fontSize: 13,
+      fontWeight: 500,
+      padding: '6px 14px',
+    },
+    buttonBack: {
+      color: isDark ? '#a8a29e' : '#78716c',
+      fontSize: 13,
+      fontWeight: 500,
+      marginRight: 8,
+    },
+    buttonSkip: {
+      color: isDark ? '#78716c' : '#a8a29e',
+      fontSize: 12,
+    },
+    buttonClose: {
+      color: isDark ? '#a8a29e' : '#78716c',
+    },
+    spotlight: {
+      borderRadius: 8,
+    },
+  };
+}
--- a/pipeline/download/greenspace_water.py
+++ b/pipeline/download/greenspace_water.py
@ -16,7 +16,6 @@ from shapely import wkb
 from shapely.geometry import MultiPolygon, Polygon
 from tqdm import tqdm

-from .pois import download_pbf

 MIN_AREA_SQM = 5_000  # ~70m x 70m — skip pocket parks and small ponds

@ -103,12 +102,7 @@ def main():
    )
    args = parser.parse_args()

-    if args.pbf.exists():
-        pbf_file = args.pbf
-        print(f"Using existing PBF: {pbf_file}")
-    else:
-        download_pbf(args.pbf)
-
+    pbf_file = args.pbf
    print("Extracting greenspace/water areas from PBF (two-pass area assembly)...")
    with tqdm(
        unit=" areas", unit_scale=True, desc="Processing", smoothing=0.05
--- a/pipeline/transform/_price_utils.py
+++ b/pipeline/transform/_price_utils.py
@ -1,121 +0,0 @@
-"""Shared utilities for price index, price estimate, and renovation premium scripts."""
-
-import numpy as np
-import polars as pl
-
-CURRENT_YEAR = 2025
-TERRACE_TYPES = [
-    "Mid-Terrace",
-    "End-Terrace",
-    "Enclosed Mid-Terrace",
-    "Enclosed End-Terrace",
-    "Terraced",
-]
-FLAT_TYPES = ["Flats/Maisonettes", "Flat", "Maisonette"]
-TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"]
-SHRINKAGE_K = 50
-
-
-def type_group_expr():
-    """Polars expression: Property type -> type_group."""
-    return (
-        pl.when(pl.col("Property type").is_in(TERRACE_TYPES))
-        .then(pl.lit("Terraced"))
-        .when(pl.col("Property type").is_in(FLAT_TYPES))
-        .then(pl.lit("Flats"))
-        .when(pl.col("Property type") == "Bungalow")
-        .then(pl.lit("Bungalow"))
-        .when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
-        .then(pl.col("Property type"))
-        .otherwise(pl.lit(None))
-        .alias("type_group")
-    )
-
-
-def sector_expr():
-    """Polars expression: Postcode -> sector (drop last 2 chars, strip)."""
-    return (
-        pl.col("Postcode")
-        .str.slice(0, pl.col("Postcode").str.len_chars() - 2)
-        .str.strip_chars()
-        .alias("sector")
-    )
-
-
-def hierarchy_keys(sector: str) -> tuple[str, str]:
-    """Return (district, area) for a sector string."""
-    district = sector.rsplit(" ", 1)[0] if " " in sector else sector
-    area = ""
-    for ch in district:
-        if ch.isalpha():
-            area += ch
-        else:
-            break
-    return district, area
-
-
-AGE_BREAKS = [1900, 1930, 1950, 1967, 1983, 2000, 2010]
-AGE_LABELS = [
-    "pre-1900",
-    "1900-1929",
-    "1930-1949",
-    "1950-1966",
-    "1967-1982",
-    "1983-1999",
-    "2000-2009",
-    "2010+",
-]
-
-HEDONIC_COLUMNS = [
-    "Last known price",
-    "Date of last transaction",
-    "Property type",
-    "Total floor area (sqm)",
-    "Postcode",
-]
-
-
-def age_band_expr():
-    """Polars expression: Construction age (UInt16 year) → age band string."""
-    expr = pl.when(pl.col("Construction age").is_null()).then(pl.lit(None))
-    for i, brk in enumerate(AGE_BREAKS):
-        expr = expr.when(pl.col("Construction age") < brk).then(pl.lit(AGE_LABELS[i]))
-    return expr.otherwise(pl.lit(AGE_LABELS[-1])).alias("age_band")
-
-
-NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"]
-
-
-def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:
-    """Build hedonic feature matrix from a DataFrame with type_group column.
-
-    Columns (5 total): log(floor_area), 4 type dummies (ref: Detached).
-    Sector fixed effects do the heavy lifting — additional property features
-    (EPC, rooms, age) add no predictive value after sector demeaning.
-    """
-    fa = df["Total floor area (sqm)"].to_numpy().astype(np.float32)
-    log_fa = np.log(np.maximum(fa, 1.0)).reshape(-1, 1)
-    tg = df["type_group"].to_numpy()
-    parts = [log_fa]
-    for t in NON_REF_TYPES:
-        parts.append((tg == t).astype(np.float32).reshape(-1, 1))
-    return np.hstack(parts)
-
-
-def extract_centroids(input_path) -> dict[str, tuple[float, float]]:
-    """Compute mean lat/lon per postcode sector."""
-    print("Computing sector centroids...")
-    df = (
-        pl.scan_parquet(input_path)
-        .select("Postcode", "lat", "lon")
-        .filter(pl.col("Postcode").is_not_null(), pl.col("lat").is_not_null())
-        .with_columns(sector_expr())
-        .group_by("sector")
-        .agg(pl.col("lat").mean(), pl.col("lon").mean())
-        .collect()
-    )
-    centroids = {}
-    for row in df.iter_rows(named=True):
-        centroids[row["sector"]] = (row["lat"], row["lon"])
-    print(f"  {len(centroids):,} sector centroids")
-    return centroids
--- a/pipeline/transform/hedonic_quality.py
+++ b/pipeline/transform/hedonic_quality.py
@ -1,300 +0,0 @@
-"""Cross-Sectional Hedonic Model (Per-Type)
-
-Trains separate OLS models per property type on recent sales (last 5 years)
-with sector fixed effects via Frisch-Waugh-Lovell demeaning:
-
-    log(price) = beta_type * log(floor_area) + alpha_sector_type + epsilon
-
-Each type gets its own floor area elasticity and sector intercepts, capturing
-that detached houses (beta=0.74) have higher price sensitivity to size than
-terraced houses (beta=0.60), and a sector's value differs by property type.
-
-Sector intercepts are hierarchically shrunk (sector → district → area → national)
-and spatially smoothed via KD-tree nearest neighbors.
-
-Output: hedonic_model.json with per-type betas and sector intercepts.
-"""
-
-import argparse
-import json
-from pathlib import Path
-
-import numpy as np
-import polars as pl
-from scipy.spatial import KDTree
-
-from pipeline.transform._price_utils import (
-    CURRENT_YEAR,
-    HEDONIC_COLUMNS,
-    SHRINKAGE_K,
-    TYPE_GROUPS,
-    extract_centroids,
-    hierarchy_keys,
-    sector_expr,
-    type_group_expr,
-)
-
-TRAINING_YEARS = 5
-SPATIAL_NEIGHBORS = 5
-SPATIAL_BLEND_K = 30
-
-
-def load_training_data(input_path: Path) -> pl.DataFrame:
-    """Load recent sales with complete hedonic features."""
-    min_year = CURRENT_YEAR - TRAINING_YEARS
-    print(f"Loading training data (sales {min_year}-{CURRENT_YEAR})...")
-    df = (
-        pl.scan_parquet(input_path)
-        .select(*HEDONIC_COLUMNS)
-        .filter(
-            pl.col("Last known price").is_not_null(),
-            pl.col("Total floor area (sqm)").is_not_null(),
-            pl.col("Total floor area (sqm)") > 0,
-            pl.col("Postcode").is_not_null(),
-        )
-        .with_columns(
-            pl.col("Date of last transaction").dt.year().alias("sale_year"),
-            type_group_expr(),
-            sector_expr(),
-        )
-        .filter(
-            pl.col("type_group").is_not_null(),
-            pl.col("sale_year").is_not_null(),
-            pl.col("sale_year") >= min_year,
-            pl.col("sale_year") <= CURRENT_YEAR,
-        )
-        .collect()
-    )
-    print(f"  {len(df):,} complete cases")
-    return df
-
-
-def train_type_model(
-    df: pl.DataFrame, type_group: str
-) -> tuple[float, dict[str, float], dict[str, int], float]:
-    """Train hedonic model for a single property type.
-
-    Returns (beta_fa, sector_intercepts, sector_counts, national_intercept).
-    """
-    t_df = df.filter(pl.col("type_group") == type_group)
-    y = np.log(t_df["Last known price"].to_numpy().astype(np.float64))
-    log_fa = np.log(
-        np.maximum(t_df["Total floor area (sqm)"].to_numpy().astype(np.float64), 1.0)
-    )
-    X = log_fa.reshape(-1, 1)
-    sectors = t_df["sector"].to_list()
-
-    # Group by sector for demeaning
-    sector_indices: dict[str, list[int]] = {}
-    for i, s in enumerate(sectors):
-        sector_indices.setdefault(s, []).append(i)
-
-    # Compute sector means and demean
-    X_demeaned = np.empty_like(X)
-    y_demeaned = np.empty_like(y)
-    sector_X_means: dict[str, np.ndarray] = {}
-    sector_y_means: dict[str, float] = {}
-    sector_counts: dict[str, int] = {}
-
-    for s, idxs in sector_indices.items():
-        idx = np.array(idxs)
-        X_mean = X[idx].mean(axis=0)
-        y_mean = y[idx].mean()
-        sector_X_means[s] = X_mean
-        sector_y_means[s] = y_mean
-        X_demeaned[idx] = X[idx] - X_mean
-        y_demeaned[idx] = y[idx] - y_mean
-        sector_counts[s] = len(idxs)
-
-    # OLS on demeaned data
-    beta = np.linalg.lstsq(X_demeaned, y_demeaned, rcond=None)[0]
-    beta_fa = float(beta[0])
-
-    # Recover sector intercepts
-    sector_intercepts = {}
-    for s in sector_indices:
-        sector_intercepts[s] = float(sector_y_means[s] - beta_fa * sector_X_means[s][0])
-
-    national_intercept = float(np.mean(list(sector_intercepts.values())))
-
-    # R-squared
-    y_pred = X[:, 0] * beta_fa
-    for i, s in enumerate(sectors):
-        y_pred[i] += sector_intercepts[s]
-    ss_res = np.sum((y - y_pred) ** 2)
-    ss_tot = np.sum((y - y.mean()) ** 2)
-    r2 = 1 - ss_res / ss_tot
-
-    print(
-        f"  {type_group:<15s}: n={len(t_df):>9,}  β_fa={beta_fa:.4f}  "
-        f"R²={r2:.4f}  sectors={len(sector_intercepts):,}"
-    )
-
-    return beta_fa, sector_intercepts, sector_counts, national_intercept
-
-
-def shrink_intercepts(
-    sector_intercepts: dict[str, float],
-    sector_counts: dict[str, int],
-) -> dict[str, float]:
-    """Hierarchical shrinkage: sector -> district -> area -> national."""
-    national = float(np.mean(list(sector_intercepts.values())))
-
-    sector_to_dist: dict[str, str] = {}
-    dist_to_area: dict[str, str] = {}
-    for s in sector_intercepts:
-        d, a = hierarchy_keys(s)
-        sector_to_dist[s] = d
-        dist_to_area[d] = a
-
-    # Area-level intercepts (weighted mean of sectors in area)
-    area_vals: dict[str, list[tuple[float, int]]] = {}
-    for s, val in sector_intercepts.items():
-        d = sector_to_dist[s]
-        a = dist_to_area[d]
-        area_vals.setdefault(a, []).append((val, sector_counts.get(s, 0)))
-
-    area_intercepts: dict[str, float] = {}
-    area_counts: dict[str, int] = {}
-    for a, entries in area_vals.items():
-        total_n = sum(n for _, n in entries)
-        if total_n > 0:
-            area_intercepts[a] = sum(v * n for v, n in entries) / total_n
-        else:
-            area_intercepts[a] = sum(v for v, _ in entries) / len(entries)
-        area_counts[a] = total_n
-
-    # District-level intercepts
-    dist_vals: dict[str, list[tuple[float, int]]] = {}
-    for s, val in sector_intercepts.items():
-        d = sector_to_dist[s]
-        dist_vals.setdefault(d, []).append((val, sector_counts.get(s, 0)))
-
-    dist_intercepts: dict[str, float] = {}
-    dist_counts: dict[str, int] = {}
-    for d, entries in dist_vals.items():
-        total_n = sum(n for _, n in entries)
-        if total_n > 0:
-            dist_intercepts[d] = sum(v * n for v, n in entries) / total_n
-        else:
-            dist_intercepts[d] = sum(v for v, _ in entries) / len(entries)
-        dist_counts[d] = total_n
-
-    # Shrink: area -> national
-    area_shrunk: dict[str, float] = {}
-    for a, val in area_intercepts.items():
-        n = area_counts[a]
-        w = n / (n + SHRINKAGE_K)
-        area_shrunk[a] = w * val + (1 - w) * national
-
-    # Shrink: district -> area
-    dist_shrunk: dict[str, float] = {}
-    for d, val in dist_intercepts.items():
-        a = dist_to_area[d]
-        parent = area_shrunk.get(a, national)
-        n = dist_counts[d]
-        w = n / (n + SHRINKAGE_K)
-        dist_shrunk[d] = w * val + (1 - w) * parent
-
-    # Shrink: sector -> district
-    result: dict[str, float] = {}
-    for s, val in sector_intercepts.items():
-        d = sector_to_dist[s]
-        parent = dist_shrunk.get(d, national)
-        n = sector_counts.get(s, 0)
-        w = n / (n + SHRINKAGE_K)
-        result[s] = w * val + (1 - w) * parent
-
-    return result
-
-
-def spatial_smooth_intercepts(
-    sector_intercepts: dict[str, float],
-    centroids: dict[str, tuple[float, float]],
-    sector_counts: dict[str, int],
-) -> dict[str, float]:
-    """Blend sparse sector intercepts with K nearest neighbors."""
-    sectors_with_coords = [s for s in sector_intercepts if s in centroids]
-    if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
-        return sector_intercepts
-
-    coords = np.array([centroids[s] for s in sectors_with_coords])
-    mean_lat = np.mean(coords[:, 0])
-    scale = np.cos(np.radians(mean_lat))
-    scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
-    tree = KDTree(scaled_coords)
-
-    result = dict(sector_intercepts)
-    for i, sec in enumerate(sectors_with_coords):
-        n = sector_counts.get(sec, 0)
-        self_w = n / (n + SPATIAL_BLEND_K)
-        if self_w > 0.95:
-            continue
-
-        dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
-        neighbor_dists = dists[1:]
-        neighbor_idxs = idxs[1:]
-
-        inv_dists = []
-        neighbor_vals = []
-        for d, j in zip(neighbor_dists, neighbor_idxs):
-            ns = sectors_with_coords[j]
-            if d > 0 and ns in sector_intercepts:
-                inv_dists.append(1.0 / d)
-                neighbor_vals.append(sector_intercepts[ns])
-
-        if not neighbor_vals:
-            continue
-
-        total_inv = sum(inv_dists)
-        nbr_w = 1.0 - self_w
-        blended = self_w * sector_intercepts[sec]
-        for val, iw in zip(neighbor_vals, inv_dists):
-            blended += nbr_w * (iw / total_inv) * val
-        result[sec] = blended
-
-    return result
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Train cross-sectional hedonic model")
-    parser.add_argument(
-        "--input", type=Path, required=True, help="Path to wide.parquet"
-    )
-    parser.add_argument(
-        "--output", type=Path, required=True, help="Output hedonic_model.json"
-    )
-    args = parser.parse_args()
-
-    df = load_training_data(args.input)
-    centroids = extract_centroids(args.input)
-
-    print("\nTraining per-type models...")
-    type_models = {}
-    total_sectors = 0
-
-    for tg in TYPE_GROUPS:
-        beta_fa, raw_intercepts, sector_counts, national = train_type_model(df, tg)
-
-        shrunk = shrink_intercepts(raw_intercepts, sector_counts)
-        smoothed = spatial_smooth_intercepts(shrunk, centroids, sector_counts)
-        total_sectors += len(smoothed)
-
-        type_models[tg] = {
-            "beta_fa": beta_fa,
-            "sector_intercepts": smoothed,
-            "national_intercept": national,
-        }
-
-    # Output
-    args.output.parent.mkdir(parents=True, exist_ok=True)
-    with open(args.output, "w") as f:
-        json.dump({"type_models": type_models}, f, indent=2)
-
-    size_kb = args.output.stat().st_size / 1024
-    print(f"\nWrote {args.output} ({size_kb:.0f} KB)")
-    print(f"  {len(TYPE_GROUPS)} type models, {total_sectors:,} total sector intercepts")
-
-
-if __name__ == "__main__":
-    main()
--- a/pipeline/transform/price_backtest.py
+++ b/pipeline/transform/price_backtest.py
@ -1,385 +0,0 @@
-"""Backtesting: Evaluate price index model on held-out recent sales.
-
-Test set: properties with 2+ sales where the last sale is 2022-2025.
-Uses the second-to-last sale as input, predicts the last sale price.
-Compares index-based prediction against a naive baseline (raw input price).
-Uses type-stratified index when available, falling back to "All" type.
-
-Output: backtest_results.parquet with predictions vs actuals.
-"""
-
-import argparse
-import json
-from pathlib import Path
-
-import numpy as np
-import polars as pl
-
-from pipeline.transform._price_utils import (
-    CURRENT_YEAR,
-    HEDONIC_COLUMNS,
-    sector_expr,
-    type_group_expr,
-)
-
-TEST_YEAR_MIN = 2022
-
-
-def extract_test_set(
-    input_path: Path, include_hedonic_cols: bool = False
-) -> pl.DataFrame:
-    """Extract test pairs: second-to-last sale as input, last sale as ground truth."""
-    print("Loading test set...")
-    cols = ["Postcode", "historical_prices", "Property type"]
-    if include_hedonic_cols:
-        for c in HEDONIC_COLUMNS:
-            if c not in cols:
-                cols.append(c)
-    df = (
-        pl.scan_parquet(input_path)
-        .select(cols)
-        .filter(
-            pl.col("Postcode").is_not_null(),
-            pl.col("historical_prices").list.len() >= 2,
-        )
-        .with_columns(
-            sector_expr(),
-            type_group_expr(),
-            # Last sale (ground truth)
-            pl.col("historical_prices")
-            .list.last()
-            .struct.field("year")
-            .alias("actual_year"),
-            pl.col("historical_prices")
-            .list.last()
-            .struct.field("price")
-            .alias("actual_price"),
-            # Second-to-last sale (input)
-            pl.col("historical_prices")
-            .list.get(-2)
-            .struct.field("year")
-            .alias("input_year"),
-            pl.col("historical_prices")
-            .list.get(-2)
-            .struct.field("price")
-            .alias("input_price"),
-        )
-        .filter(
-            pl.col("actual_year") >= TEST_YEAR_MIN,
-            pl.col("input_price") > 0,
-            pl.col("actual_price") > 0,
-            pl.col("actual_year") > pl.col("input_year"),
-        )
-        .collect()
-    )
-    print(f"  {len(df):,} test pairs (last sale {TEST_YEAR_MIN}-{CURRENT_YEAR})")
-    return df
-
-
-def predict(test: pl.DataFrame, index: pl.DataFrame) -> pl.DataFrame:
-    """Index-based prediction with type-stratified fallback."""
-    has_type_group = "type_group" in index.columns
-
-    if has_type_group:
-        idx_typed = index.filter(pl.col("type_group") != "All")
-        idx_all = index.filter(pl.col("type_group") == "All")
-
-        # Join type-specific index at input year
-        test = test.join(
-            idx_typed.select(
-                "sector", "type_group", "year", pl.col("log_index").alias("li_in_typed")
-            ),
-            left_on=["sector", "type_group", "input_year"],
-            right_on=["sector", "type_group", "year"],
-            how="left",
-        )
-        # Join "All" index at input year
-        test = test.join(
-            idx_all.select("sector", "year", pl.col("log_index").alias("li_in_all")),
-            left_on=["sector", "input_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-        # Join type-specific index at actual year
-        test = test.join(
-            idx_typed.select(
-                "sector",
-                "type_group",
-                "year",
-                pl.col("log_index").alias("li_act_typed"),
-            ),
-            left_on=["sector", "type_group", "actual_year"],
-            right_on=["sector", "type_group", "year"],
-            how="left",
-        )
-        # Join "All" index at actual year
-        test = test.join(
-            idx_all.select("sector", "year", pl.col("log_index").alias("li_act_all")),
-            left_on=["sector", "actual_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-
-        test = test.with_columns(
-            pl.col("li_in_typed")
-            .fill_null(pl.col("li_in_all"))
-            .alias("log_index_input"),
-            pl.col("li_act_typed")
-            .fill_null(pl.col("li_act_all"))
-            .alias("log_index_actual"),
-        )
-    else:
-        # Unstratified index
-        test = test.join(
-            index.select(
-                "sector", "year", pl.col("log_index").alias("log_index_input")
-            ),
-            left_on=["sector", "input_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-        test = test.join(
-            index.select(
-                "sector", "year", pl.col("log_index").alias("log_index_actual")
-            ),
-            left_on=["sector", "actual_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-
-    test = test.with_columns(
-        (
-            pl.col("input_price").cast(pl.Float64)
-            * (pl.col("log_index_actual") - pl.col("log_index_input")).exp()
-        )
-        .fill_null(pl.col("input_price").cast(pl.Float64))
-        .alias("predicted"),
-    )
-    return test
-
-
-def compute_metrics(actual: np.ndarray, predicted: np.ndarray) -> dict:
-    valid = np.isfinite(predicted) & np.isfinite(actual) & (actual > 0)
-    actual = actual[valid]
-    predicted = predicted[valid]
-
-    ape = np.abs(predicted - actual) / actual
-    signed_err = predicted - actual
-
-    return {
-        "MdAPE (%)": float(np.median(ape) * 100),
-        "% within 10%": float(np.mean(ape <= 0.10) * 100),
-        "% within 20%": float(np.mean(ape <= 0.20) * 100),
-        "% within 30%": float(np.mean(ape <= 0.30) * 100),
-        "MAE (£)": float(np.mean(np.abs(signed_err))),
-        "Mean signed error (£)": float(np.mean(signed_err)),
-        "n": int(len(actual)),
-    }
-
-
-def print_metrics_table(metrics_by_stage: dict):
-    print("\n" + "=" * 55)
-    print("BACKTEST RESULTS")
-    print("=" * 55)
-
-    metric_names = [
-        "MdAPE (%)",
-        "% within 10%",
-        "% within 20%",
-        "% within 30%",
-        "MAE (£)",
-        "Mean signed error (£)",
-        "n",
-    ]
-    stages = list(metrics_by_stage.keys())
-
-    header = f"{'Metric':<25s}"
-    for stage in stages:
-        header += f" {stage:>14s}"
-    print(header)
-    print("-" * 55)
-
-    for metric in metric_names:
-        row = f"{metric:<25s}"
-        for stage in stages:
-            val = metrics_by_stage[stage][metric]
-            if metric == "n":
-                row += f" {val:>14,d}"
-            elif "£" in metric:
-                row += f" {val:>13,.0f}"
-            else:
-                row += f" {val:>13.1f}%"
-        print(row)
-
-    print("=" * 55)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Backtest price estimation model")
-    parser.add_argument(
-        "--input", type=Path, required=True, help="Path to wide.parquet"
-    )
-    parser.add_argument(
-        "--index", type=Path, required=True, help="Path to price_index.parquet"
-    )
-    parser.add_argument(
-        "--output", type=Path, required=True, help="Output backtest_results.parquet"
-    )
-    parser.add_argument(
-        "--hedonic-model",
-        type=Path,
-        default=None,
-        help="Path to hedonic_model.json (optional)",
-    )
-    args = parser.parse_args()
-
-    index = pl.read_parquet(args.index)
-    has_type_group = "type_group" in index.columns
-    if has_type_group:
-        print(
-            f"Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
-            f"{index['type_group'].n_unique()} type groups"
-        )
-    else:
-        print(
-            f"Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors"
-        )
-
-    has_hedonic = args.hedonic_model is not None
-    test = extract_test_set(args.input, include_hedonic_cols=has_hedonic)
-
-    print("\nPredicting with price index...")
-    test = predict(test, index)
-
-    # Compute and print metrics
-    actual = test["actual_price"].to_numpy().astype(np.float64)
-    metrics = {
-        "Naive": compute_metrics(
-            actual, test["input_price"].to_numpy().astype(np.float64)
-        ),
-        "Index": compute_metrics(
-            actual, test["predicted"].to_numpy().astype(np.float64)
-        ),
-    }
-
-    # Hedonic blending
-    if has_hedonic:
-        print("\nApplying hedonic blending...")
-        with open(args.hedonic_model) as f:
-            model = json.load(f)
-        type_models = model["type_models"]
-
-        # Identify eligible rows for hedonic estimate
-        hedonic_mask = (
-            pl.col("Total floor area (sqm)").is_not_null()
-            & (pl.col("Total floor area (sqm)") > 0)
-            & pl.col("type_group").is_not_null()
-        )
-        eligible_mask = test.select(hedonic_mask).to_series()
-        eligible = test.filter(eligible_mask)
-
-        if len(eligible) > 0:
-            log_fa = np.log(
-                np.maximum(
-                    eligible["Total floor area (sqm)"].to_numpy().astype(np.float64),
-                    1.0,
-                )
-            )
-            sectors = eligible["sector"].to_list()
-            types = eligible["type_group"].to_list()
-
-            # Per-type hedonic prediction
-            log_hedonic = np.empty(len(eligible))
-            for i in range(len(eligible)):
-                tm = type_models.get(types[i])
-                if tm is None:
-                    log_hedonic[i] = np.nan
-                    continue
-                alpha = tm["sector_intercepts"].get(
-                    sectors[i], tm["national_intercept"]
-                )
-                log_hedonic[i] = tm["beta_fa"] * log_fa[i] + alpha
-
-            valid = np.isfinite(log_hedonic)
-
-            # Hold years: input_year to actual_year (simulating real prediction)
-            input_years = eligible["input_year"].to_numpy().astype(np.float64)
-            actual_years = eligible["actual_year"].to_numpy().astype(np.float64)
-            hold_years = np.maximum(actual_years - input_years, 0.0)
-
-            log_index_pred = np.log(
-                np.maximum(eligible["predicted"].to_numpy().astype(np.float64), 1.0)
-            )
-
-            # Sweep tau values (only on valid hedonic rows)
-            tau_values = [5.0, 10.0, 15.0, 20.0, 30.0]
-            actual_eligible = eligible["actual_price"].to_numpy().astype(np.float64)
-            best_tau = 15.0
-            best_mdape = float("inf")
-
-            print(f"\n  tau sweep ({valid.sum():,} eligible properties):")
-            for tau in tau_values:
-                blend_w = hold_years / (hold_years + tau)
-                log_blended = np.where(
-                    valid,
-                    (1 - blend_w) * log_index_pred + blend_w * log_hedonic,
-                    log_index_pred,
-                )
-                blended = np.exp(log_blended)
-                m = compute_metrics(actual_eligible, blended)
-                marker = ""
-                if m["MdAPE (%)"] < best_mdape:
-                    best_mdape = m["MdAPE (%)"]
-                    best_tau = tau
-                    marker = " <-- best"
-                print(
-                    f"    tau={tau:>4.0f}: MdAPE={m['MdAPE (%)']:>5.1f}%, "
-                    f"within 10%={m['% within 10%']:>5.1f}%{marker}"
-                )
-
-            print(f"\n  Best tau = {best_tau}")
-
-            # Compute blended predictions with best tau for full test set
-            blend_w = hold_years / (hold_years + best_tau)
-            log_blended = np.where(
-                valid,
-                (1 - blend_w) * log_index_pred + blend_w * log_hedonic,
-                log_index_pred,
-            )
-            blended_eligible = np.exp(log_blended)
-
-            # Merge back: for non-eligible rows, use index prediction
-            blended_all = test["predicted"].to_numpy().astype(np.float64).copy()
-            eligible_indices = eligible_mask.arg_true()
-            for i, idx in enumerate(eligible_indices):
-                blended_all[idx] = blended_eligible[i]
-
-            test = test.with_columns(
-                pl.Series("blended", blended_all, dtype=pl.Float64),
-            )
-            metrics["Blended"] = compute_metrics(actual, blended_all)
-
-    print_metrics_table(metrics)
-
-    # Save results
-    result_cols = [
-        "Postcode",
-        "sector",
-        "input_year",
-        "input_price",
-        "actual_year",
-        "actual_price",
-        "predicted",
-    ]
-    if "blended" in test.columns:
-        result_cols.append("blended")
-    result = test.select(result_cols)
-
-    result.write_parquet(args.output)
-    size_mb = args.output.stat().st_size / (1024 * 1024)
-    print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
-    print(f"  {len(result):,} rows")
-
-
-if __name__ == "__main__":
-    main()
--- a/pipeline/transform/price_estimate.py
+++ b/pipeline/transform/price_estimate.py
@ -1,414 +0,0 @@
-"""Augment wide.parquet with an estimated current price column.
-
-Joins the precomputed repeat-sales price index (from price_index.py) with each
-property's last known sale to produce an inflation-adjusted current price estimate.
-Uses type-stratified index when available, falling back to "All" type.
-
-Optionally applies renovation premiums from renovation_premium.py: for properties
-with post-sale renovation events, the estimated price is adjusted upward based on
-data-driven per-area premiums with time decay.
-
-Modifies wide.parquet in-place, adding the "Estimated current price" column.
-"""
-
-import argparse
-import json
-import math
-from pathlib import Path
-
-import numpy as np
-import polars as pl
-
-from pipeline.transform._price_utils import (
-    CURRENT_YEAR,
-    sector_expr,
-    type_group_expr,
-)
-
-HALF_LIFE = 10.0
-DECAY_RATE = math.log(2) / HALF_LIFE
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Augment wide.parquet with estimated current prices"
-    )
-    parser.add_argument(
-        "--input",
-        type=Path,
-        required=True,
-        help="Path to wide.parquet (modified in-place)",
-    )
-    parser.add_argument(
-        "--index", type=Path, required=True, help="Path to price_index.parquet"
-    )
-    parser.add_argument(
-        "--renovation-premium",
-        type=Path,
-        default=None,
-        help="Path to renovation_premium.parquet (optional)",
-    )
-    parser.add_argument(
-        "--hedonic-model",
-        type=Path,
-        default=None,
-        help="Path to hedonic_model.json (optional)",
-    )
-    args = parser.parse_args()
-
-    print("Loading wide.parquet...")
-    df = pl.read_parquet(args.input)
-    print(f"  {len(df):,} rows, {len(df.columns)} columns")
-
-    # Drop existing estimated columns if re-running
-    for col in ["Estimated current price", "Est. price per sqm"]:
-        if col in df.columns:
-            df = df.drop(col)
-
-    # Derive helper columns for the join
-    has_price = (
-        pl.col("Last known price").is_not_null()
-        & pl.col("Postcode").is_not_null()
-        & pl.col("Date of last transaction").is_not_null()
-    )
-
-    df = df.with_columns(
-        sector_expr().alias("_sector"),
-        pl.col("Date of last transaction").dt.year().alias("_sale_year"),
-        type_group_expr().alias("_type_group"),
-    )
-
-    index = pl.read_parquet(args.index)
-    has_type_group = "type_group" in index.columns
-    if has_type_group:
-        print(
-            f"  Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors, "
-            f"{index['type_group'].n_unique()} type groups"
-        )
-    else:
-        print(
-            f"  Price index: {len(index):,} rows, {index['sector'].n_unique():,} sectors (unstratified)"
-        )
-
-    print("\nApplying repeat-sales index...")
-
-    if has_type_group:
-        idx_typed = index.filter(pl.col("type_group") != "All")
-        idx_all = index.filter(pl.col("type_group") == "All")
-
-        # Join type-specific index at sale year
-        df = df.join(
-            idx_typed.select(
-                "sector",
-                "type_group",
-                "year",
-                pl.col("log_index").alias("log_idx_sale_typed"),
-            ),
-            left_on=["_sector", "_type_group", "_sale_year"],
-            right_on=["sector", "type_group", "year"],
-            how="left",
-        )
-        # Join "All" index at sale year
-        df = df.join(
-            idx_all.select(
-                "sector", "year", pl.col("log_index").alias("log_idx_sale_all")
-            ),
-            left_on=["_sector", "_sale_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-        # Join type-specific index at current year
-        df = df.join(
-            idx_typed.filter(pl.col("year") == CURRENT_YEAR).select(
-                "sector", "type_group", pl.col("log_index").alias("log_idx_cur_typed")
-            ),
-            left_on=["_sector", "_type_group"],
-            right_on=["sector", "type_group"],
-            how="left",
-        )
-        # Join "All" index at current year
-        df = df.join(
-            idx_all.filter(pl.col("year") == CURRENT_YEAR).select(
-                "sector", pl.col("log_index").alias("log_idx_cur_all")
-            ),
-            left_on="_sector",
-            right_on="sector",
-            how="left",
-        )
-
-        df = df.with_columns(
-            pl.col("log_idx_sale_typed")
-            .fill_null(pl.col("log_idx_sale_all"))
-            .alias("_log_index_sale"),
-            pl.col("log_idx_cur_typed")
-            .fill_null(pl.col("log_idx_cur_all"))
-            .alias("_log_index_current"),
-        )
-    else:
-        df = df.join(
-            index.select(
-                "sector", "year", pl.col("log_index").alias("_log_index_sale")
-            ),
-            left_on=["_sector", "_sale_year"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-        index_current = index.filter(pl.col("year") == CURRENT_YEAR).select(
-            "sector", pl.col("log_index").alias("_log_index_current")
-        )
-        df = df.join(index_current, left_on="_sector", right_on="sector", how="left")
-
-    # Compute estimate — only for rows with a known price
-    df = df.with_columns(
-        pl.when(has_price)
-        .then(
-            pl.col("Last known price").cast(pl.Float64)
-            * (pl.col("_log_index_current") - pl.col("_log_index_sale")).exp()
-        )
-        .otherwise(pl.lit(None))
-        .alias("Estimated current price"),
-    )
-
-    n_adjusted = df.filter(has_price & pl.col("_log_index_sale").is_not_null()).height
-    n_with_price = df.filter(has_price).height
-    print(
-        f"  {n_adjusted:,} of {n_with_price:,} properties adjusted by index ({n_adjusted / max(n_with_price, 1) * 100:.1f}%)"
-    )
-
-    # Apply hedonic blending if model provided
-    if args.hedonic_model is not None:
-        print("\nApplying hedonic blending...")
-        with open(args.hedonic_model) as f:
-            model = json.load(f)
-        type_models = model["type_models"]
-        tau = model.get("tau", 15.0)
-        print(f"  tau = {tau}, {len(type_models)} type models")
-
-        # Add type_group for per-type lookup
-        df = df.with_columns(type_group_expr())
-        hedonic_mask = (
-            has_price
-            & pl.col("Estimated current price").is_not_null()
-            & pl.col("Total floor area (sqm)").is_not_null()
-            & (pl.col("Total floor area (sqm)") > 0)
-            & pl.col("type_group").is_not_null()
-        )
-        eligible = df.filter(hedonic_mask)
-
-        if len(eligible) > 0:
-            log_fa = np.log(
-                np.maximum(
-                    eligible["Total floor area (sqm)"].to_numpy().astype(np.float64),
-                    1.0,
-                )
-            )
-            sectors = eligible["_sector"].to_list()
-            types = eligible["type_group"].to_list()
-
-            # Per-type hedonic prediction
-            log_hedonic = np.empty(len(eligible))
-            for i in range(len(eligible)):
-                tm = type_models.get(types[i])
-                if tm is None:
-                    log_hedonic[i] = np.nan
-                    continue
-                alpha = tm["sector_intercepts"].get(
-                    sectors[i], tm["national_intercept"]
-                )
-                log_hedonic[i] = tm["beta_fa"] * log_fa[i] + alpha
-
-            valid = np.isfinite(log_hedonic)
-
-            # Hold years and blend weight
-            sale_years = eligible["_sale_year"].to_numpy().astype(np.float64)
-            hold_years = np.maximum(CURRENT_YEAR - sale_years, 0.0)
-            blend_w = hold_years / (hold_years + tau)
-
-            # Blend in log space
-            log_index_est = np.log(
-                eligible["Estimated current price"].to_numpy().astype(np.float64)
-            )
-            log_blended = np.where(
-                valid,
-                (1 - blend_w) * log_index_est + blend_w * log_hedonic,
-                log_index_est,
-            )
-            blended_prices = np.exp(log_blended)
-
-            # Write back into df
-            eligible_indices = df.select(hedonic_mask).to_series().arg_true()
-            price_arr = df["Estimated current price"].to_numpy().astype(np.float64)
-            for i, idx in enumerate(eligible_indices):
-                price_arr[idx] = blended_prices[i]
-            df = df.with_columns(
-                pl.Series("Estimated current price", price_arr, dtype=pl.Float64),
-            )
-
-            n_blended = int(valid.sum())
-            avg_w = float(np.mean(blend_w[valid]))
-            print(
-                f"  {n_blended:,} properties with hedonic blending (avg blend weight: {avg_w:.3f})"
-            )
-        else:
-            print("  No eligible properties for hedonic blending")
-
-    # Apply renovation premiums if provided
-    if args.renovation_premium is not None:
-        print("\nApplying renovation premiums...")
-        reno_prem = pl.read_parquet(args.renovation_premium)
-        print(f"  Loaded {len(reno_prem):,} premium rows")
-
-        # Find properties with post-sale renovation events
-        has_reno = (
-            pl.col("renovation_history").is_not_null()
-            & (pl.col("renovation_history").list.len() > 0)
-            & pl.col("Estimated current price").is_not_null()
-        )
-
-        # Explode renovation events, filter to post-sale only
-        reno_rows = (
-            df.lazy()
-            .filter(has_reno)
-            .select("_sector", "_type_group", "_sale_year", "renovation_history")
-            .with_row_index("_row_idx")
-            .explode("renovation_history")
-            .with_columns(
-                pl.col("renovation_history").struct.field("year").alias("_event_year"),
-                pl.col("renovation_history").struct.field("event").alias("_event_type"),
-            )
-            .filter(pl.col("_event_year") > pl.col("_sale_year"))
-            .collect()
-        )
-
-        if len(reno_rows) > 0:
-            # Take most recent event per (row, event_type)
-            latest = (
-                reno_rows.lazy()
-                .group_by("_row_idx", "_event_type", "_sector", "_type_group")
-                .agg(pl.col("_event_year").max().alias("_event_year"))
-                .collect()
-            )
-
-            # Compute time-decayed premium
-            latest = latest.with_columns(
-                (-DECAY_RATE * (CURRENT_YEAR - pl.col("_event_year")).cast(pl.Float64))
-                .exp()
-                .alias("_decay"),
-            )
-
-            # Join with renovation_premium.parquet — try typed first, fall back to "All"
-            rp_typed = reno_prem.filter(pl.col("type_group") != "All")
-            rp_all = reno_prem.filter(pl.col("type_group") == "All")
-
-            latest = (
-                latest.join(
-                    rp_typed.select(
-                        "sector",
-                        "type_group",
-                        "event_type",
-                        pl.col("log_premium").alias("_lp_typed"),
-                    ),
-                    left_on=["_sector", "_type_group", "_event_type"],
-                    right_on=["sector", "type_group", "event_type"],
-                    how="left",
-                )
-                .join(
-                    rp_all.select(
-                        "sector", "event_type", pl.col("log_premium").alias("_lp_all")
-                    ),
-                    left_on=["_sector", "_event_type"],
-                    right_on=["sector", "event_type"],
-                    how="left",
-                )
-                .with_columns(
-                    pl.col("_lp_typed")
-                    .fill_null(pl.col("_lp_all"))
-                    .fill_null(0.0)
-                    .alias("_log_premium"),
-                )
-            )
-
-            # Compute total decayed log premium per property
-            per_property = (
-                latest.lazy()
-                .with_columns(
-                    (pl.col("_log_premium") * pl.col("_decay")).alias("_decayed_lp"),
-                )
-                .group_by("_row_idx")
-                .agg(pl.col("_decayed_lp").sum().alias("_reno_log_premium"))
-                .collect()
-            )
-
-            # We need to map _row_idx back to the main df. Re-derive the row indices.
-            # _row_idx was generated from filtered rows — we need the actual df row indices.
-            reno_mask = df.select(has_reno).to_series()
-            actual_indices = reno_mask.arg_true()
-
-            # Build a mapping: _row_idx -> actual df row
-            idx_map = per_property.with_columns(
-                pl.col("_row_idx")
-                .map_elements(
-                    lambda i: int(actual_indices[i]),
-                    return_dtype=pl.UInt32,
-                )
-                .alias("_df_row"),
-            )
-
-            # Create a full-length column of zeros, then fill in premium values
-            reno_log_prem = [0.0] * len(df)
-            for row in idx_map.iter_rows(named=True):
-                reno_log_prem[row["_df_row"]] = row["_reno_log_premium"]
-
-            df = df.with_columns(
-                pl.Series("_reno_log_premium", reno_log_prem, dtype=pl.Float64),
-            )
-
-            # Apply: multiply estimated price by exp(reno_log_premium) where premium > 0
-            df = df.with_columns(
-                pl.when(pl.col("_reno_log_premium") != 0.0)
-                .then(
-                    pl.col("Estimated current price")
-                    * pl.col("_reno_log_premium").exp()
-                )
-                .otherwise(pl.col("Estimated current price"))
-                .alias("Estimated current price"),
-            )
-
-            n_with_premium = idx_map.height
-            avg_multiplier = math.exp(
-                per_property["_reno_log_premium"]
-                .filter(per_property["_reno_log_premium"] != 0.0)
-                .mean()
-            )
-            print(f"  {n_with_premium:,} properties with renovation premium applied")
-            print(
-                f"  Average premium multiplier: {avg_multiplier:.3f} ({avg_multiplier - 1:.1%} uplift)"
-            )
-        else:
-            print("  No properties with post-sale renovation events")
-
-    # Derive estimated price per sqm where both estimated price and floor area exist
-    df = df.with_columns(
-        (pl.col("Estimated current price") / pl.col("Total floor area (sqm)"))
-        .round(0)
-        .cast(pl.Int32)
-        .alias("Est. price per sqm"),
-    )
-
-    # Drop all temporary columns
-    temp_cols = [c for c in df.columns if c.startswith("_") or c.startswith("log_idx_")]
-    # Also drop hedonic-derived column if it was added
-    if "type_group" in df.columns:
-        temp_cols.append("type_group")
-    df = df.drop(temp_cols)
-
-    df.write_parquet(args.input)
-    size_mb = args.input.stat().st_size / (1024 * 1024)
-    print(f"\nWrote {args.input} ({size_mb:.1f} MB)")
-    print(
-        f"  {len(df):,} rows, {len(df.columns)} columns (including 'Estimated current price')"
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/pipeline/transform/price_index.py
+++ b/pipeline/transform/price_index.py
@ -1,523 +0,0 @@
-"""Repeat-Sales Price Index (improved)
-
-Builds a hierarchical repeat-sales price index with:
-1. Stratification by property type (Detached/Semi-Detached/Terraced/Flats)
-2. Robust regression (IRLS with Huber weights) instead of hard outlier cutoff
-3. National hedonic time-dummy model as ultimate shrinkage fallback
-4. Spatial smoothing for sparse sectors via KD-tree nearest neighbors
-
-Output: price_index.parquet — sector × type_group × year → log_index
-"""
-
-import argparse
-from pathlib import Path
-
-import numpy as np
-import polars as pl
-from scipy.sparse import csc_matrix
-from scipy.sparse.linalg import lsqr
-from scipy.spatial import KDTree
-from tqdm import tqdm
-
-from pipeline.transform._price_utils import (
-    CURRENT_YEAR,
-    SHRINKAGE_K,
-    TYPE_GROUPS,
-    build_hedonic_features,
-    extract_centroids,
-    hierarchy_keys,
-    sector_expr,
-    type_group_expr,
-)
-
-# --- Constants ---
-MIN_PAIRS = 5
-OUTLIER_THRESHOLD = 3.0  # hard pre-filter; Huber handles the rest
-HUBER_K = 1.345
-IRLS_ITERATIONS = 5
-SPATIAL_NEIGHBORS = 5
-SPATIAL_BLEND_K = 30
-
-
-# --- Pair extraction ---
-
-
-def extract_pairs(input_path: Path) -> pl.DataFrame:
-    print("Extracting repeat-sale pairs...")
-    df = (
-        pl.scan_parquet(input_path)
-        .select("Postcode", "historical_prices", "Property type")
-        .filter(
-            pl.col("Postcode").is_not_null(),
-            pl.col("historical_prices").list.len() >= 2,
-        )
-        .with_columns(sector_expr(), type_group_expr())
-        .collect()
-    )
-    print(f"  {len(df):,} properties with 2+ transactions")
-
-    pairs = (
-        df.lazy()
-        .with_columns(
-            pl.col("historical_prices")
-            .list.slice(0, pl.col("historical_prices").list.len() - 1)
-            .alias("from_txn"),
-            pl.col("historical_prices").list.slice(1).alias("to_txn"),
-        )
-        .explode("from_txn", "to_txn")
-        .with_columns(
-            pl.col("from_txn").struct.field("year").alias("year1"),
-            pl.col("from_txn").struct.field("price").alias("price1"),
-            pl.col("to_txn").struct.field("year").alias("year2"),
-            pl.col("to_txn").struct.field("price").alias("price2"),
-        )
-        .select("sector", "type_group", "year1", "price1", "year2", "price2")
-        .filter(
-            pl.col("price1") > 0,
-            pl.col("price2") > 0,
-            pl.col("year2") > pl.col("year1"),
-        )
-        .with_columns(
-            (pl.col("price2").cast(pl.Float64) / pl.col("price1").cast(pl.Float64))
-            .log()
-            .alias("log_ratio"),
-            (1.0 / (pl.col("year2") - pl.col("year1")).cast(pl.Float64).sqrt()).alias(
-                "weight"
-            ),
-        )
-        .filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
-        .collect()
-    )
-
-    # Add hierarchy columns
-    pairs = pairs.with_columns(
-        pl.col("sector").str.replace(r"\s+\d+$", "").alias("district"),
-    ).with_columns(
-        pl.col("district").str.replace(r"\d.*$", "").alias("area"),
-    )
-
-    print(f"  {len(pairs):,} pairs extracted")
-    return pairs
-
-
-# --- Robust IRLS solver ---
-
-
-def solve_robust_index(
-    years1: np.ndarray,
-    years2: np.ndarray,
-    log_ratios: np.ndarray,
-    base_weights: np.ndarray,
-) -> dict[int, float]:
-    """IRLS Huber M-estimation for the Case-Shiller repeat-sales model."""
-    n = len(years1)
-    if n < MIN_PAIRS:
-        return {}
-
-    all_years = np.union1d(years1, years2)
-    min_year = int(all_years.min())
-
-    col = 0
-    year_to_col = {}
-    for y in all_years:
-        iy = int(y)
-        if iy != min_year:
-            year_to_col[iy] = col
-            col += 1
-    n_cols = len(year_to_col)
-    if n_cols == 0:
-        return {}
-
-    # Vectorized column index mapping
-    col2 = np.full(n, -1, dtype=np.int32)
-    col1 = np.full(n, -1, dtype=np.int32)
-    for year, c in year_to_col.items():
-        col2[years2 == year] = c
-        col1[years1 == year] = c
-
-    # Sparse matrix structure (fixed across iterations)
-    mask2 = col2 >= 0
-    mask1 = col1 >= 0
-    rows_arr = np.concatenate([np.where(mask2)[0], np.where(mask1)[0]])
-    cols_arr = np.concatenate([col2[mask2], col1[mask1]])
-    signs_arr = np.concatenate([np.ones(mask2.sum()), -np.ones(mask1.sum())])
-
-    weights = base_weights.copy()
-
-    for _ in range(IRLS_ITERATIONS):
-        data = signs_arr * weights[rows_arr]
-        A = csc_matrix((data, (rows_arr, cols_arr)), shape=(n, n_cols))
-        b = log_ratios * weights
-        betas = lsqr(A, b, atol=1e-10, btol=1e-10)[0]
-
-        # Residuals
-        predicted = np.zeros(n)
-        predicted[mask2] += betas[col2[mask2]]
-        predicted[mask1] -= betas[col1[mask1]]
-        residuals = log_ratios - predicted
-
-        # Huber reweighting
-        abs_r = np.abs(residuals)
-        huber_w = np.where(abs_r <= HUBER_K, 1.0, HUBER_K / np.maximum(abs_r, 1e-10))
-        weights = base_weights * huber_w
-
-    index = {min_year: 0.0}
-    for year, c in year_to_col.items():
-        index[year] = float(betas[c])
-    return index
-
-
-def compute_indices_for_level(pairs: pl.DataFrame, group_col: str):
-    """Solve robust indices for each group. Returns (indices, n_pairs) dicts."""
-    groups = pairs.group_by(group_col).agg(
-        pl.col("year1"),
-        pl.col("year2"),
-        pl.col("log_ratio"),
-        pl.col("weight"),
-    )
-    indices = {}
-    n_pairs = {}
-    for row in tqdm(
-        groups.iter_rows(named=True), total=len(groups), desc=f"    {group_col}"
-    ):
-        key = row[group_col]
-        y1 = np.array(row["year1"], dtype=np.int32)
-        y2 = np.array(row["year2"], dtype=np.int32)
-        lr = np.array(row["log_ratio"], dtype=np.float64)
-        w = np.array(row["weight"], dtype=np.float64)
-        idx = solve_robust_index(y1, y2, lr, w)
-        if idx:
-            indices[key] = idx
-            n_pairs[key] = len(y1)
-    return indices, n_pairs
-
-
-# --- Hedonic model ---
-
-
-def compute_hedonic_index(
-    input_path: Path, min_year: int, max_year: int
-) -> dict[int, float]:
-    """Two-step hedonic index: regress log(price) on features, average residual by year."""
-    print("Computing hedonic index...")
-    df = (
-        pl.scan_parquet(input_path)
-        .select(
-            "Last known price",
-            "Date of last transaction",
-            "Property type",
-            "Total floor area (sqm)",
-        )
-        .filter(
-            pl.col("Last known price").is_not_null(),
-            pl.col("Total floor area (sqm)").is_not_null(),
-            pl.col("Total floor area (sqm)") > 0,
-        )
-        .with_columns(
-            pl.col("Date of last transaction").dt.year().alias("sale_year"),
-            type_group_expr(),
-        )
-        .filter(
-            pl.col("type_group").is_not_null(),
-            pl.col("sale_year").is_not_null(),
-            pl.col("sale_year") >= min_year,
-            pl.col("sale_year") <= max_year,
-        )
-        .collect()
-    )
-    print(f"  {len(df):,} complete cases for hedonic model")
-
-    # Target
-    log_price = np.log(df["Last known price"].to_numpy().astype(np.float64))
-    sale_years = df["sale_year"].to_numpy()
-
-    # Build feature matrix (18 hedonic features + intercept)
-    X = build_hedonic_features(df)
-    F = np.hstack([X, np.ones((len(df), 1), dtype=np.float32)])
-    print(f"  Feature matrix: {F.shape[0]:,} × {F.shape[1]}")
-
-    # Step 1: regress log(price) on features → quality score
-    betas = np.linalg.lstsq(F.astype(np.float64), log_price, rcond=None)[0]
-    quality_score = F.astype(np.float64) @ betas
-    residuals = log_price - quality_score
-
-    # Step 2: average residual by year = hedonic index
-    hedonic = {}
-    for y in range(min_year, max_year + 1):
-        mask = sale_years == y
-        if mask.sum() > 0:
-            hedonic[y] = float(np.mean(residuals[mask]))
-
-    # Normalize: min_year = 0
-    base = hedonic.get(min_year, 0.0)
-    for y in hedonic:
-        hedonic[y] -= base
-
-    print(
-        f"  Hedonic index: {len(hedonic)} years, range {min(hedonic.values()):.3f} to {max(hedonic.values()):.3f}"
-    )
-    return hedonic
-
-
-# --- Shrinkage ---
-
-
-def shrink_index(raw: dict, parent: dict, n_pairs: int, k: int = SHRINKAGE_K) -> dict:
-    w = n_pairs / (n_pairs + k)
-    result = {}
-    for y in set(raw) | set(parent):
-        r = raw.get(y, parent.get(y, 0.0))
-        p = parent.get(y, raw.get(y, 0.0))
-        result[y] = w * r + (1 - w) * p
-    return result
-
-
-def apply_shrinkage(
-    sector_idx,
-    sector_n,
-    district_idx,
-    district_n,
-    area_idx,
-    area_n,
-    national_idx,
-    national_n,
-    hedonic_idx,
-    all_sectors,
-    sector_to_dist,
-    dist_to_area,
-):
-    """Top-down hierarchical shrinkage: national→hedonic, area→national, etc."""
-    # National → hedonic
-    national_shrunk = shrink_index(national_idx, hedonic_idx, national_n)
-
-    # Area → national
-    area_shrunk = {}
-    for area, idx in area_idx.items():
-        area_shrunk[area] = shrink_index(idx, national_shrunk, area_n[area])
-
-    # District → area
-    district_shrunk = {}
-    for dist, idx in district_idx.items():
-        a = dist_to_area.get(dist, "")
-        parent = area_shrunk.get(a, national_shrunk)
-        district_shrunk[dist] = shrink_index(idx, parent, district_n[dist])
-
-    # Sector → district
-    sector_shrunk = {}
-    for sec, idx in sector_idx.items():
-        d = sector_to_dist.get(sec, "")
-        parent = district_shrunk.get(d, national_shrunk)
-        sector_shrunk[sec] = shrink_index(idx, parent, sector_n[sec])
-
-    # Fill sectors without their own index
-    for sec in all_sectors:
-        if sec not in sector_shrunk:
-            d = sector_to_dist.get(sec, "")
-            a = dist_to_area.get(d, "")
-            sector_shrunk[sec] = district_shrunk.get(
-                d, area_shrunk.get(a, national_shrunk)
-            )
-
-    return sector_shrunk
-
-
-# --- Spatial smoothing ---
-
-
-def spatial_smooth(
-    sector_indices: dict,
-    centroids: dict,
-    n_pairs_map: dict,
-) -> dict:
-    """Blend sparse sector indices with K nearest neighbors."""
-    # Build coordinate arrays for sectors with centroids
-    sectors_with_coords = [s for s in sector_indices if s in centroids]
-    if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
-        return sector_indices
-
-    coords = np.array([centroids[s] for s in sectors_with_coords])
-    # Scale longitude by cos(mean_lat) for approximate Euclidean distance
-    mean_lat = np.mean(coords[:, 0])
-    scale = np.cos(np.radians(mean_lat))
-    scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
-    tree = KDTree(scaled_coords)
-
-    result = dict(sector_indices)
-    for i, sec in enumerate(sectors_with_coords):
-        n = n_pairs_map.get(sec, 0)
-        self_w = n / (n + SPATIAL_BLEND_K)
-        if self_w > 0.95:
-            continue  # enough data, skip smoothing
-
-        dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
-        # Skip self (index 0, distance ~0)
-        neighbor_dists = dists[1:]
-        neighbor_idxs = idxs[1:]
-
-        inv_dists = []
-        neighbor_indices = []
-        for d, j in zip(neighbor_dists, neighbor_idxs):
-            ns = sectors_with_coords[j]
-            if d > 0 and ns in sector_indices:
-                inv_dists.append(1.0 / d)
-                neighbor_indices.append(sector_indices[ns])
-
-        if not neighbor_indices:
-            continue
-
-        total_inv = sum(inv_dists)
-        nbr_w = 1.0 - self_w
-        ws = [iw / total_inv * nbr_w for iw in inv_dists]
-
-        blended = {}
-        all_years = set(sector_indices[sec])
-        for ni in neighbor_indices:
-            all_years |= set(ni)
-        for y in all_years:
-            val = self_w * sector_indices[sec].get(y, 0.0)
-            for ni, w in zip(neighbor_indices, ws):
-                val += w * ni.get(y, 0.0)
-            blended[y] = val
-        result[sec] = blended
-
-    return result
-
-
-# --- Forward fill ---
-
-
-def forward_fill(index: dict, min_year: int, max_year: int) -> dict:
-    filled = {}
-    last = 0.0
-    for y in range(min_year, max_year + 1):
-        if y in index:
-            last = index[y]
-        filled[y] = last
-    return filled
-
-
-# --- Main ---
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Build improved repeat-sales price index"
-    )
-    parser.add_argument("--input", type=Path, required=True)
-    parser.add_argument("--output", type=Path, required=True)
-    args = parser.parse_args()
-
-    pairs = extract_pairs(args.input)
-    centroids = extract_centroids(args.input)
-
-    min_year = int(pairs["year1"].min())
-    max_year = max(int(pairs["year2"].max()), CURRENT_YEAR)
-
-    hedonic_idx = compute_hedonic_index(args.input, min_year, max_year)
-
-    # Precompute hierarchy
-    all_sectors = pairs["sector"].unique().to_list()
-    sector_to_dist = {}
-    dist_to_area = {}
-    for s in all_sectors:
-        d, a = hierarchy_keys(s)
-        sector_to_dist[s] = d
-        dist_to_area[d] = a
-
-    # Process each type group + "All"
-    all_type_groups = ["All"] + TYPE_GROUPS
-    final = {}  # {type_group: {sector: {year: log_index}}}
-    final_n = {}  # {type_group: {sector: n_pairs}}
-
-    for tg in all_type_groups:
-        print(f"\n--- {tg} ---")
-        typed = pairs if tg == "All" else pairs.filter(pl.col("type_group") == tg)
-        if len(typed) < MIN_PAIRS:
-            print(f"  Skipping (only {len(typed)} pairs)")
-            final[tg] = {s: dict(hedonic_idx) for s in all_sectors}
-            final_n[tg] = {s: 0 for s in all_sectors}
-            continue
-
-        print(f"  {len(typed):,} pairs")
-
-        # National
-        np_arrs = typed.select("year1", "year2", "log_ratio", "weight")
-        national_idx = solve_robust_index(
-            np_arrs["year1"].to_numpy(),
-            np_arrs["year2"].to_numpy(),
-            np_arrs["log_ratio"].to_numpy(),
-            np_arrs["weight"].to_numpy(),
-        )
-        national_n = len(typed)
-        print(f"  National: {len(national_idx)} years")
-
-        # Area, district, sector
-        print("  Computing per-level indices:")
-        area_idx, area_n = compute_indices_for_level(typed, "area")
-        district_idx, district_n = compute_indices_for_level(typed, "district")
-        sector_idx, sector_n = compute_indices_for_level(typed, "sector")
-        print(
-            f"  {len(area_idx)} areas, {len(district_idx)} districts, {len(sector_idx)} sectors"
-        )
-
-        # Shrinkage
-        print("  Applying shrinkage...")
-        sector_shrunk = apply_shrinkage(
-            sector_idx,
-            sector_n,
-            district_idx,
-            district_n,
-            area_idx,
-            area_n,
-            national_idx,
-            national_n,
-            hedonic_idx,
-            all_sectors,
-            sector_to_dist,
-            dist_to_area,
-        )
-
-        # Spatial smoothing
-        print("  Spatial smoothing...")
-        sector_smoothed = spatial_smooth(sector_shrunk, centroids, sector_n)
-
-        # Forward fill
-        for sec in all_sectors:
-            sector_smoothed[sec] = forward_fill(
-                sector_smoothed.get(sec, hedonic_idx), min_year, max_year
-            )
-
-        final[tg] = sector_smoothed
-        final_n[tg] = sector_n
-
-    # Assemble output
-    print("\nAssembling output...")
-    rows = []
-    for tg in all_type_groups:
-        for sec in all_sectors:
-            n = final_n[tg].get(sec, 0)
-            for year, log_idx in final[tg][sec].items():
-                rows.append((sec, tg, year, log_idx, n))
-
-    result = pl.DataFrame(
-        rows,
-        schema={
-            "sector": pl.String,
-            "type_group": pl.String,
-            "year": pl.Int32,
-            "log_index": pl.Float64,
-            "n_pairs": pl.Int64,
-        },
-        orient="row",
-    ).sort("type_group", "sector", "year")
-
-    result.write_parquet(args.output)
-    size_mb = args.output.stat().st_size / (1024 * 1024)
-    print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
-    print(
-        f"  {result['sector'].n_unique():,} sectors × {len(all_type_groups)} types × {max_year - min_year + 1} years = {len(result):,} rows"
-    )
-
-
-if __name__ == "__main__":
-    main()
--- a/pipeline/transform/renovation_premium.py
+++ b/pipeline/transform/renovation_premium.py
@ -1,572 +0,0 @@
-"""Estimate per-area renovation premiums from repeat-sale residuals.
-
-For each repeat-sale pair, computes the residual after removing the price-index
-predicted return. Pairs where renovation events occurred between sales should have
-systematically higher residuals. A WLS regression estimates the log-premium per
-event type, with hierarchical shrinkage and spatial smoothing.
-
-Output: renovation_premium.parquet — sector × type_group × event_type → log_premium
-"""
-
-import argparse
-import math
-from pathlib import Path
-
-import numpy as np
-import polars as pl
-from scipy.spatial import KDTree
-
-from pipeline.transform._price_utils import (
-    SHRINKAGE_K,
-    TYPE_GROUPS,
-    extract_centroids,
-    hierarchy_keys,
-    sector_expr,
-    type_group_expr,
-)
-
-HALF_LIFE = 10.0
-DECAY_RATE = math.log(2) / HALF_LIFE
-OUTLIER_THRESHOLD = 3.0
-MIN_PAIRS = 10
-SPATIAL_NEIGHBORS = 5
-SPATIAL_BLEND_K = 30
-EVENT_TYPES = ["Extension", "Renovation", "Remodeling"]
-
-
-def extract_pairs_with_events(input_path: Path, index_path: Path) -> pl.DataFrame:
-    """Extract repeat-sale pairs with renovation events and index residuals."""
-    print("Extracting repeat-sale pairs with renovation events...")
-
-    df = (
-        pl.scan_parquet(input_path)
-        .select("Postcode", "historical_prices", "Property type", "renovation_history")
-        .filter(
-            pl.col("Postcode").is_not_null(),
-            pl.col("historical_prices").list.len() >= 2,
-        )
-        .with_columns(sector_expr(), type_group_expr())
-        .collect()
-    )
-    print(f"  {len(df):,} properties with 2+ transactions")
-
-    # Build consecutive pairs
-    pairs = (
-        df.lazy()
-        .with_columns(
-            pl.col("historical_prices")
-            .list.slice(0, pl.col("historical_prices").list.len() - 1)
-            .alias("from_txn"),
-            pl.col("historical_prices").list.slice(1).alias("to_txn"),
-        )
-        .explode("from_txn", "to_txn")
-        .with_columns(
-            pl.col("from_txn").struct.field("year").alias("year1"),
-            pl.col("from_txn").struct.field("price").alias("price1"),
-            pl.col("to_txn").struct.field("year").alias("year2"),
-            pl.col("to_txn").struct.field("price").alias("price2"),
-        )
-        .select(
-            "sector",
-            "type_group",
-            "year1",
-            "price1",
-            "year2",
-            "price2",
-            "renovation_history",
-        )
-        .filter(
-            pl.col("price1") > 0,
-            pl.col("price2") > 0,
-            pl.col("year2") > pl.col("year1"),
-        )
-        .with_columns(
-            (pl.col("price2").cast(pl.Float64) / pl.col("price1").cast(pl.Float64))
-            .log()
-            .alias("log_ratio"),
-        )
-        .filter(pl.col("log_ratio").abs() <= OUTLIER_THRESHOLD)
-        .collect()
-    )
-    print(f"  {len(pairs):,} repeat-sale pairs")
-
-    # Join price index to compute residuals
-    index = pl.read_parquet(index_path)
-    has_type_group = "type_group" in index.columns
-
-    if has_type_group:
-        idx_typed = index.filter(pl.col("type_group") != "All")
-        idx_all = index.filter(pl.col("type_group") == "All")
-
-        # Join at year1
-        pairs = pairs.join(
-            idx_typed.select(
-                "sector", "type_group", "year", pl.col("log_index").alias("li1_typed")
-            ),
-            left_on=["sector", "type_group", "year1"],
-            right_on=["sector", "type_group", "year"],
-            how="left",
-        ).join(
-            idx_all.select("sector", "year", pl.col("log_index").alias("li1_all")),
-            left_on=["sector", "year1"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-        # Join at year2
-        pairs = pairs.join(
-            idx_typed.select(
-                "sector", "type_group", "year", pl.col("log_index").alias("li2_typed")
-            ),
-            left_on=["sector", "type_group", "year2"],
-            right_on=["sector", "type_group", "year"],
-            how="left",
-        ).join(
-            idx_all.select("sector", "year", pl.col("log_index").alias("li2_all")),
-            left_on=["sector", "year2"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-
-        pairs = pairs.with_columns(
-            (pl.col("li1_typed").fill_null(pl.col("li1_all"))).alias("_li1"),
-            (pl.col("li2_typed").fill_null(pl.col("li2_all"))).alias("_li2"),
-        )
-    else:
-        pairs = pairs.join(
-            index.select("sector", "year", pl.col("log_index").alias("_li1")),
-            left_on=["sector", "year1"],
-            right_on=["sector", "year"],
-            how="left",
-        ).join(
-            index.select("sector", "year", pl.col("log_index").alias("_li2")),
-            left_on=["sector", "year2"],
-            right_on=["sector", "year"],
-            how="left",
-        )
-
-    # Compute residual = log_ratio - (index2 - index1)
-    pairs = pairs.with_columns(
-        (
-            pl.col("log_ratio")
-            - (pl.col("_li2").fill_null(0.0) - pl.col("_li1").fill_null(0.0))
-        ).alias("residual"),
-        (1.0 / (pl.col("year2") - pl.col("year1")).cast(pl.Float64).sqrt()).alias(
-            "weight"
-        ),
-    )
-
-    # For each pair, compute time-decayed renovation indicators
-    # Use row index for unique identification (composite keys aren't unique per pair)
-    pairs = pairs.with_row_index("_pair_idx")
-
-    for et in EVENT_TYPES:
-        col_name = f"has_{et.lower()}"
-        pairs = pairs.with_columns(pl.lit(0.0).alias(col_name))
-
-    # Process properties that have renovation history
-    has_reno = pairs.filter(
-        pl.col("renovation_history").is_not_null()
-        & (pl.col("renovation_history").list.len() > 0)
-    )
-
-    if len(has_reno) > 0:
-        reno_exploded = (
-            has_reno.select("_pair_idx", "year1", "year2", "renovation_history")
-            .explode("renovation_history")
-            .with_columns(
-                pl.col("renovation_history").struct.field("year").alias("event_year"),
-                pl.col("renovation_history").struct.field("event").alias("event_type"),
-            )
-            # Only events between the two sales
-            .filter(
-                (pl.col("event_year") > pl.col("year1"))
-                & (pl.col("event_year") <= pl.col("year2"))
-            )
-        )
-
-        if len(reno_exploded) > 0:
-            # For each pair + event type, take the most recent event
-            latest_events = reno_exploded.group_by(
-                "_pair_idx", "event_type", "year2"
-            ).agg(pl.col("event_year").max().alias("latest_event_year"))
-
-            # Compute time-decayed indicator: exp(-decay_rate * (year2 - event_year))
-            latest_events = latest_events.with_columns(
-                (
-                    -DECAY_RATE
-                    * (pl.col("year2") - pl.col("latest_event_year")).cast(pl.Float64)
-                )
-                .exp()
-                .alias("decayed_indicator"),
-            )
-
-            # Pivot to wide format using _pair_idx for unique join
-            for et in EVENT_TYPES:
-                et_data = latest_events.filter(pl.col("event_type") == et)
-                if len(et_data) > 0:
-                    col_name = f"has_{et.lower()}"
-                    pairs = (
-                        pairs.join(
-                            et_data.select(
-                                "_pair_idx",
-                                pl.col("decayed_indicator").alias(f"_{col_name}"),
-                            ),
-                            on="_pair_idx",
-                            how="left",
-                        )
-                        .with_columns(
-                            pl.col(f"_{col_name}").fill_null(0.0).alias(col_name),
-                        )
-                        .drop(f"_{col_name}")
-                    )
-
-    pairs = pairs.drop("_pair_idx")
-
-    # Add hierarchy columns
-    pairs = pairs.with_columns(
-        pl.col("sector").str.replace(r"\s+\d+$", "").alias("district"),
-    ).with_columns(
-        pl.col("district").str.replace(r"\d.*$", "").alias("area"),
-    )
-
-    # Count reno pairs
-    reno_mask = (
-        (pl.col("has_extension") > 0)
-        | (pl.col("has_renovation") > 0)
-        | (pl.col("has_remodeling") > 0)
-    )
-    n_reno = pairs.filter(reno_mask).height
-    print(
-        f"  {n_reno:,} pairs with renovation events ({n_reno / len(pairs) * 100:.1f}%)"
-    )
-
-    # Drop temporary columns from index join + renovation_history (no longer needed)
-    temp_cols = [
-        c
-        for c in pairs.columns
-        if c.startswith("_li") or c.startswith("li1_") or c.startswith("li2_")
-    ]
-    pairs = pairs.drop(temp_cols + ["renovation_history"])
-
-    return pairs
-
-
-def wls_regression(
-    residuals: np.ndarray,
-    weights: np.ndarray,
-    X: np.ndarray,
-) -> np.ndarray:
-    """Weighted least squares: residual ~ X (with intercept column in X).
-
-    Uses sqrt(weights) scaling to avoid building a full N×N diagonal matrix.
-    """
-    sqrt_w = np.sqrt(weights)[:, np.newaxis]
-    Xw = X * sqrt_w
-    yw = residuals * sqrt_w.ravel()
-    try:
-        betas = np.linalg.lstsq(Xw, yw, rcond=None)[0]
-    except np.linalg.LinAlgError:
-        betas = np.zeros(X.shape[1])
-    return betas
-
-
-def compute_premiums_for_group(df: pl.DataFrame) -> dict[str, float]:
-    """Run WLS regression for a group, return {event_type: log_premium}."""
-    n = len(df)
-    if n < MIN_PAIRS:
-        return {}
-
-    residuals = df["residual"].to_numpy().astype(np.float64)
-    weights = df["weight"].to_numpy().astype(np.float64)
-
-    # Build design matrix: intercept + 3 event indicators
-    X = np.column_stack(
-        [
-            np.ones(n),
-            df["has_extension"].to_numpy().astype(np.float64),
-            df["has_renovation"].to_numpy().astype(np.float64),
-            df["has_remodeling"].to_numpy().astype(np.float64),
-        ]
-    )
-
-    # Check if we have any renovation pairs in this group
-    reno_sum = X[:, 1:].sum()
-    if reno_sum < 1.0:
-        return {}
-
-    betas = wls_regression(residuals, weights, X)
-    # betas[0] is intercept, betas[1:4] are the premiums
-    return {
-        "Extension": float(betas[1]),
-        "Renovation": float(betas[2]),
-        "Remodeling": float(betas[3]),
-    }
-
-
-def compute_premiums_for_level(
-    pairs: pl.DataFrame, group_col: str
-) -> tuple[dict, dict]:
-    """Compute premiums per group at a given hierarchy level.
-
-    Returns (premiums, n_reno_pairs) dicts keyed by group value.
-    premiums[key] = {event_type: log_premium}
-    """
-    groups = pairs.group_by(group_col)
-    premiums = {}
-    n_reno_pairs = {}
-    for key, group_df in groups:
-        key_val = key[0]
-        result = compute_premiums_for_group(group_df)
-        if result:
-            premiums[key_val] = result
-            # Count pairs with any reno indicator
-            reno_mask = (
-                (group_df["has_extension"].to_numpy() > 0)
-                | (group_df["has_renovation"].to_numpy() > 0)
-                | (group_df["has_remodeling"].to_numpy() > 0)
-            )
-            n_reno_pairs[key_val] = int(reno_mask.sum())
-    return premiums, n_reno_pairs
-
-
-def shrink_premium(
-    raw: dict[str, float], parent: dict[str, float], n: int
-) -> dict[str, float]:
-    """Shrink raw premiums toward parent level."""
-    w = n / (n + SHRINKAGE_K)
-    result = {}
-    for et in EVENT_TYPES:
-        r = raw.get(et, parent.get(et, 0.0))
-        p = parent.get(et, raw.get(et, 0.0))
-        result[et] = w * r + (1 - w) * p
-    return result
-
-
-def apply_shrinkage(
-    sector_prem,
-    sector_n,
-    district_prem,
-    district_n,
-    area_prem,
-    area_n,
-    national_prem,
-    national_n,
-    all_sectors,
-    sector_to_dist,
-    dist_to_area,
-):
-    """Top-down hierarchical shrinkage for premiums."""
-    # Area -> national
-    area_shrunk = {}
-    for area, prem in area_prem.items():
-        area_shrunk[area] = shrink_premium(prem, national_prem, area_n.get(area, 0))
-
-    # District -> area
-    district_shrunk = {}
-    for dist, prem in district_prem.items():
-        a = dist_to_area.get(dist, "")
-        parent = area_shrunk.get(a, national_prem)
-        district_shrunk[dist] = shrink_premium(prem, parent, district_n.get(dist, 0))
-
-    # Sector -> district
-    sector_shrunk = {}
-    for sec, prem in sector_prem.items():
-        d = sector_to_dist.get(sec, "")
-        parent = district_shrunk.get(d, national_prem)
-        sector_shrunk[sec] = shrink_premium(prem, parent, sector_n.get(sec, 0))
-
-    # Fill missing sectors
-    for sec in all_sectors:
-        if sec not in sector_shrunk:
-            d = sector_to_dist.get(sec, "")
-            a = dist_to_area.get(d, "")
-            sector_shrunk[sec] = district_shrunk.get(
-                d, area_shrunk.get(a, national_prem)
-            )
-
-    return sector_shrunk
-
-
-def spatial_smooth(
-    sector_premiums: dict[str, dict[str, float]],
-    centroids: dict[str, tuple[float, float]],
-    n_reno_map: dict[str, int],
-) -> dict[str, dict[str, float]]:
-    """Blend sparse sector premiums with K nearest neighbors."""
-    sectors_with_coords = [s for s in sector_premiums if s in centroids]
-    if len(sectors_with_coords) < SPATIAL_NEIGHBORS + 1:
-        return sector_premiums
-
-    coords = np.array([centroids[s] for s in sectors_with_coords])
-    mean_lat = np.mean(coords[:, 0])
-    scale = np.cos(np.radians(mean_lat))
-    scaled_coords = np.column_stack([coords[:, 0], coords[:, 1] * scale])
-    tree = KDTree(scaled_coords)
-
-    result = dict(sector_premiums)
-    for i, sec in enumerate(sectors_with_coords):
-        n = n_reno_map.get(sec, 0)
-        self_w = n / (n + SPATIAL_BLEND_K)
-        if self_w > 0.95:
-            continue
-
-        dists, idxs = tree.query(scaled_coords[i], k=SPATIAL_NEIGHBORS + 1)
-        neighbor_dists = dists[1:]
-        neighbor_idxs = idxs[1:]
-
-        inv_dists = []
-        neighbor_prems = []
-        for d, j in zip(neighbor_dists, neighbor_idxs):
-            ns = sectors_with_coords[j]
-            if d > 0 and ns in sector_premiums:
-                inv_dists.append(1.0 / d)
-                neighbor_prems.append(sector_premiums[ns])
-
-        if not neighbor_prems:
-            continue
-
-        total_inv = sum(inv_dists)
-        nbr_w = 1.0 - self_w
-        ws = [iw / total_inv * nbr_w for iw in inv_dists]
-
-        blended = {}
-        for et in EVENT_TYPES:
-            val = self_w * sector_premiums[sec].get(et, 0.0)
-            for np_dict, w in zip(neighbor_prems, ws):
-                val += w * np_dict.get(et, 0.0)
-            blended[et] = val
-        result[sec] = blended
-
-    return result
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Estimate renovation premiums from repeat-sale residuals"
-    )
-    parser.add_argument(
-        "--input", type=Path, required=True, help="Path to wide.parquet"
-    )
-    parser.add_argument(
-        "--index", type=Path, required=True, help="Path to price_index.parquet"
-    )
-    parser.add_argument(
-        "--output", type=Path, required=True, help="Output renovation_premium.parquet"
-    )
-    args = parser.parse_args()
-
-    pairs = extract_pairs_with_events(args.input, args.index)
-    centroids = extract_centroids(args.input)
-
-    # Precompute hierarchy
-    all_sectors = pairs["sector"].unique().to_list()
-    sector_to_dist = {}
-    dist_to_area = {}
-    for s in all_sectors:
-        d, a = hierarchy_keys(s)
-        sector_to_dist[s] = d
-        dist_to_area[d] = a
-
-    all_type_groups = ["All"] + TYPE_GROUPS
-    rows = []
-
-    for tg in all_type_groups:
-        print(f"\n--- {tg} ---")
-        typed = pairs if tg == "All" else pairs.filter(pl.col("type_group") == tg)
-        if len(typed) < MIN_PAIRS:
-            print(f"  Skipping (only {len(typed)} pairs)")
-            continue
-
-        print(f"  {len(typed):,} pairs")
-
-        # National
-        national_prem = compute_premiums_for_group(typed)
-        national_reno = typed.filter(
-            (pl.col("has_extension") > 0)
-            | (pl.col("has_renovation") > 0)
-            | (pl.col("has_remodeling") > 0)
-        ).height
-        if not national_prem:
-            print("  No renovation pairs at national level, skipping")
-            continue
-
-        print(
-            "  National premiums: "
-            + ", ".join(
-                f"{et}: {v:.4f} ({math.exp(v) - 1:.1%})"
-                for et, v in national_prem.items()
-            )
-        )
-
-        # Per-level
-        print("  Computing per-level premiums:")
-        area_prem, area_n = compute_premiums_for_level(typed, "area")
-        district_prem, district_n = compute_premiums_for_level(typed, "district")
-        sector_prem, sector_n = compute_premiums_for_level(typed, "sector")
-        print(
-            f"  {len(area_prem)} areas, {len(district_prem)} districts, {len(sector_prem)} sectors with data"
-        )
-
-        # Shrinkage
-        print("  Applying shrinkage...")
-        sector_shrunk = apply_shrinkage(
-            sector_prem,
-            sector_n,
-            district_prem,
-            district_n,
-            area_prem,
-            area_n,
-            national_prem,
-            national_reno,
-            all_sectors,
-            sector_to_dist,
-            dist_to_area,
-        )
-
-        # Spatial smoothing
-        print("  Spatial smoothing...")
-        sector_smoothed = spatial_smooth(sector_shrunk, centroids, sector_n)
-
-        # Collect rows
-        for sec in all_sectors:
-            prem = sector_smoothed.get(sec, national_prem)
-            n = sector_n.get(sec, 0)
-            for et in EVENT_TYPES:
-                rows.append((sec, tg, et, prem.get(et, 0.0), n))
-
-    result = pl.DataFrame(
-        rows,
-        schema={
-            "sector": pl.String,
-            "type_group": pl.String,
-            "event_type": pl.String,
-            "log_premium": pl.Float64,
-            "n_reno_pairs": pl.Int64,
-        },
-        orient="row",
-    ).sort("type_group", "sector", "event_type")
-
-    result.write_parquet(args.output)
-    size_mb = args.output.stat().st_size / (1024 * 1024)
-    print(f"\nWrote {args.output} ({size_mb:.1f} MB)")
-    print(
-        f"  {result['sector'].n_unique():,} sectors x {len(all_type_groups)} types x {len(EVENT_TYPES)} events = {len(result):,} rows"
-    )
-
-    # Print summary statistics
-    print("\nNational premium summary:")
-    national = (
-        result.filter(pl.col("type_group") == "All")
-        .group_by("event_type")
-        .agg(
-            pl.col("log_premium").mean().alias("mean_log_premium"),
-        )
-    )
-    for row in national.iter_rows(named=True):
-        et = row["event_type"]
-        lp = row["mean_log_premium"]
-        print(f"  {et}: log_premium={lp:.4f} ({math.exp(lp) - 1:.1%} price uplift)")
-
-
-if __name__ == "__main__":
-    main()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -26,6 +26,10 @@ dependencies = [
    "pyproj>=3.7.2",
    "pyshp>=2.3.0",
    "folium>=0.20.0",
+    "flask",
+    "httpx",
+    "polars",
+    "fake-useragent>=2.2.0",
 ]

 [tool.uv]
--- a/r5-java/.gitignore
+++ b/r5-java/.gitignore
@ -0,0 +1,3 @@
+jdk/
+lib/
+out/
--- a/r5-java/Dockerfile
+++ b/r5-java/Dockerfile
@ -1,20 +0,0 @@
-FROM eclipse-temurin:21-jdk AS build
-WORKDIR /app
-
-# Download pre-built R5 fat JAR from GitHub Releases (includes all R5 deps)
-ADD https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar /app/lib/r5.jar
-
-# Gson for JSON (HTTP server is built into JDK)
-ADD https://repo1.maven.org/maven2/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar /app/lib/gson.jar
-
-COPY src/ src/
-RUN javac -cp "lib/*" -d out src/main/java/propertymap/App.java
-
-FROM eclipse-temurin:21-jre
-WORKDIR /app
-RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
-COPY --from=build /app/lib/ /app/lib/
-COPY --from=build /app/out/ /app/out/
-COPY entrypoint.sh /app/entrypoint.sh
-RUN chmod +x /app/entrypoint.sh
-ENTRYPOINT ["/app/entrypoint.sh"]
--- a/r5-java/entrypoint.sh
+++ b/r5-java/entrypoint.sh
@ -1,18 +0,0 @@
-#!/bin/bash
-set -e
-
-TRANSIT_DIR=$DATA_DIR
-NETWORK_DIR=$NETWORK_CACHE_DIR
-BUILD_DIR="$NETWORK_DIR/build"
-
-# If no cached network yet, copy transit data to a writable location for the build.
-# R5 writes temp files (.mapdb) next to the OSM/GTFS files during network construction.
-if [ ! -f "$NETWORK_DIR/network.dat" ]; then
-    echo "No cached network — copying transit data to writable build dir..."
-    mkdir -p "$BUILD_DIR"
-    cp "$OSM_DIR"/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
-    cp "$TRANSIT_DIR"/*.zip "$BUILD_DIR/" 2>/dev/null || true
-    export DATA_DIR="$BUILD_DIR"
-fi
-
-exec java -Xmx16g -cp "out:lib/*" propertymap.App
--- a/r5-java/run.sh
+++ b/r5-java/run.sh
@ -0,0 +1,129 @@
+#!/bin/bash
+set -euo pipefail
+
+# Batch-compute travel times from all places to all England postcodes
+# for all transport modes (car, bicycle, walking, transit).
+#
+# Uses each place as origin with all postcodes as destinations — R5 does one
+# routing computation per place, then reads off travel times to all postcodes.
+# For car/bicycle/walking this is symmetric (place->postcode = postcode->place).
+#
+# Output: property-data/travel-times/{mode}/
+#   - {index}.parquet files: (pcds VARCHAR, travel_minutes SMALLINT), one per place
+#   - postcodes_ref.parquet: postcode order reference
+#   - places_ref.parquet: place order reference
+#
+# Usage:
+#   ./r5-java/run.sh                        # 4 threads, 16g heap
+#   ./r5-java/run.sh --threads 8
+#   ./r5-java/run.sh --heap 24g
+
+# --- Defaults ---
+THREADS=28
+HEAP=40g
+NETWORK_DIR=property-data/r5-network
+OUTPUT_BASE=property-data/travel-times
+R5_DIR=r5-java
+
+# --- Parse args ---
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --threads)     THREADS="$2";     shift 2 ;;
+        --heap)        HEAP="$2";        shift 2 ;;
+        --network-dir) NETWORK_DIR="$2"; shift 2 ;;
+        *) echo "Unknown: $1"; exit 1 ;;
+    esac
+done
+
+# --- Verify we're in project root ---
+if [ ! -f property-data/places.parquet ] || [ ! -f property-data/arcgis_data.parquet ]; then
+    echo "Error: run from the property-map project root"
+    exit 1
+fi
+
+echo "=== R5 Batch Travel Times ==="
+echo "Threads: $THREADS | Heap: $HEAP"
+echo ""
+
+# --- Step 1: Download JDK if needed ---
+JDK_DIR="$R5_DIR/jdk"
+if [ ! -d "$JDK_DIR" ]; then
+    echo "--- Downloading JDK 21 ---"
+    ARCH=$(uname -m)
+    case "$ARCH" in
+        x86_64|amd64) JDK_ARCH="x64" ;;
+        aarch64|arm64) JDK_ARCH="aarch64" ;;
+        *) echo "Unsupported architecture: $ARCH"; exit 1 ;;
+    esac
+    JDK_URL="https://api.adoptium.net/v3/binary/latest/21/ga/linux/${JDK_ARCH}/jdk/hotspot/normal/eclipse"
+    mkdir -p "$JDK_DIR"
+    curl -fL "$JDK_URL" | tar xz --strip-components=1 -C "$JDK_DIR"
+fi
+export JAVA_HOME="$JDK_DIR"
+export PATH="$JAVA_HOME/bin:$PATH"
+
+# --- Step 2: Download library JARs ---
+LIB_DIR="$R5_DIR/lib"
+mkdir -p "$LIB_DIR"
+
+R5_JAR="$LIB_DIR/r5.jar"
+DUCKDB_JAR="$LIB_DIR/duckdb.jar"
+
+if [ ! -f "$R5_JAR" ]; then
+    echo "--- Downloading R5 v7.5 fat JAR ---"
+    curl -fL -o "$R5_JAR" https://github.com/conveyal/r5/releases/download/v7.5/r5-v7.5-all.jar
+fi
+
+if [ ! -f "$DUCKDB_JAR" ]; then
+    echo "--- Downloading DuckDB JDBC ---"
+    curl -fL -o "$DUCKDB_JAR" https://repo1.maven.org/maven2/org/duckdb/duckdb_jdbc/1.0.0/duckdb_jdbc-1.0.0.jar
+fi
+
+# --- Step 3: Compile Java source ---
+OUT_DIR="$R5_DIR/out"
+SRC_DIR="$R5_DIR/src/main/java/propertymap"
+
+NEEDS_COMPILE=false
+for src in "$SRC_DIR"/*.java; do
+    class="$OUT_DIR/propertymap/$(basename "${src%.java}").class"
+    if [ ! -f "$class" ] || [ "$src" -nt "$class" ]; then
+        NEEDS_COMPILE=true
+        break
+    fi
+done
+
+if $NEEDS_COMPILE; then
+    echo "--- Compiling Java source ---"
+    mkdir -p "$OUT_DIR"
+    javac -cp "$LIB_DIR/*" -d "$OUT_DIR" "$SRC_DIR"/*.java
+fi
+
+# --- Step 4: Prepare network build directory ---
+# R5 writes .mapdb temp files next to OSM/GTFS files during network construction.
+# Copy source data to a writable build dir to avoid polluting the originals.
+mkdir -p "$NETWORK_DIR"
+DATA_DIR="property-data/transit"
+
+if [ ! -f "$NETWORK_DIR/network.dat" ]; then
+    BUILD_DIR="$NETWORK_DIR/build"
+    echo "--- No cached network — copying transit data to build dir ---"
+    mkdir -p "$BUILD_DIR"
+    cp property-data/transit/raw/*.osm.pbf "$BUILD_DIR/" 2>/dev/null || true
+    cp property-data/transit/*.zip "$BUILD_DIR/" 2>/dev/null || true
+    DATA_DIR="$BUILD_DIR"
+fi
+
+# --- Step 5: Run batch ---
+echo ""
+echo "--- Starting batch computation ---"
+DATA_DIR="$DATA_DIR" NETWORK_CACHE_DIR="$NETWORK_DIR" \
+java -Xmx"$HEAP" -cp "$OUT_DIR:$LIB_DIR/*" propertymap.App \
+    --postcodes property-data/arcgis_data.parquet \
+    --places property-data/places.parquet \
+    --output-dir "$OUTPUT_BASE" \
+    --threads "$THREADS"
+
+echo ""
+echo "=== Complete ==="
+echo "Output: $OUTPUT_BASE/{car,bicycle,walking,transit}/"
+echo "Reference: $OUTPUT_BASE/postcodes_ref.parquet, $OUTPUT_BASE/places_ref.parquet"
--- a/r5-java/src/main/java/propertymap/App.java
+++ b/r5-java/src/main/java/propertymap/App.java
@ -1,223 +1,208 @@
 package propertymap;

-import com.conveyal.r5.OneOriginResult;
-import com.conveyal.r5.analyst.FreeFormPointSet;
-import com.conveyal.r5.analyst.PointSet;
-import com.conveyal.r5.analyst.TravelTimeComputer;
-import com.conveyal.r5.analyst.WebMercatorExtents;
-import com.conveyal.r5.analyst.cluster.RegionalTask;
-import com.conveyal.r5.analyst.cluster.TravelTimeResult;
-import com.conveyal.r5.api.util.LegMode;
-import com.conveyal.r5.api.util.TransitModes;
-import com.conveyal.r5.kryo.KryoNetworkSerializer;
 import com.conveyal.r5.transit.TransportNetwork;
-import com.google.gson.Gson;
-import com.sun.net.httpserver.HttpExchange;
-import com.sun.net.httpserver.HttpServer;
-import org.locationtech.jts.geom.Coordinate;
+import org.duckdb.DuckDBConnection;

-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.net.InetSocketAddress;
-import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.time.LocalDate;
-import java.util.EnumSet;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;

+/**
+ * Batch-compute travel times from each origin (place) to all destinations (postcodes)
+ * for all transport modes (car, bicycle, walking, transit).
+ *
+ * Output per mode: one parquet file per origin in {output-dir}/{mode}/{index}.parquet
+ * with columns (pcds VARCHAR, travel_minutes SMALLINT). -1 = unreachable within 120 min.
+ */
 public class App {
-    private static TransportNetwork network;
-    private static final Gson gson = new Gson();

-    static class TravelTimeRequest {
-        double[] origin;         // [lat, lon]
-        double[][] destinations; // [[lat, lon], ...]
-        String mode;             // "transit", "car", "bicycle", "walking"
-    }
-
-    static class TravelTimeResponse {
-        double[] travel_times; // minutes, -1 = unreachable
-    }
+    private static final String[] MODES = {"car", "bicycle", "walking", "transit"};
+    private static final int MAX_RETRIES = 2;

    public static void main(String[] args) throws Exception {
-        String dataDir = System.getenv("DATA_DIR");
+        String postcodesPath = requiredArg(args, "--postcodes");
+        String placesPath = requiredArg(args, "--places");
+        String outputDirStr = requiredArg(args, "--output-dir");
+        int threads = Integer.parseInt(optionalArg(args, "--threads", "4"));

-        if (dataDir == null) {
-            System.err.println("Error: DATA_DIR environment variable not set");
-            System.exit(1);
-        }
+        Path outDir = Paths.get(outputDirStr);
+        Files.createDirectories(outDir);

-        String networkCacheDir = System.getenv("NETWORK_CACHE_DIR");
-        if (networkCacheDir == null) {
-            System.err.println("Error: NETWORK_CACHE_DIR environment variable not set");
-            System.exit(1);
-        }
+        LocalDate today = LocalDate.now();
+        TransportNetwork network = Router.loadNetwork(requiredEnv("DATA_DIR"), requiredEnv("NETWORK_CACHE_DIR"));

-        System.out.println("Loading transport network from " + dataDir);
-        System.out.println("Network cache dir: " + networkCacheDir);
+        System.err.println("Loading postcodes (England only)...");
+        Parquet.Postcodes postcodes = Parquet.loadEnglandPostcodes(
+            postcodesPath, outDir.resolve("postcodes_ref.parquet"));
+        int nDest = postcodes.lats().length;
+        System.err.printf("  %,d postcodes%n", nDest);

-        File cacheFile = new File(networkCacheDir, "network.dat");
-        if (cacheFile.exists()) {
-            System.out.println("Loading cached network from " + cacheFile);
-            network = KryoNetworkSerializer.read(cacheFile);
-        } else {
-            System.out.println("Building network (first run, this takes a few minutes)...");
-            network = TransportNetwork.fromDirectory(new File(dataDir));
-            new File(networkCacheDir).mkdirs();
-            KryoNetworkSerializer.write(network, cacheFile);
-            System.out.println("Network cached to " + cacheFile);
-        }
+        List<Router.DestinationChunk> chunks = Router.buildDestinationChunks(postcodes.lats(), postcodes.lons());

-        // Build stop-to-vertex distance tables (needed for egress routing in transit mode).
-        // Not built by fromDirectory() and too large to fit in the Kryo cache with 4GB heap.
-        System.out.println("Building stop-to-vertex distance tables...");
-        network.transitLayer.buildDistanceTables(null);
-        System.out.println("Distance tables built");
+        System.err.println("Loading places (deduplicated)...");
+        double[][] placesLatLon = Parquet.loadPlaces(placesPath, outDir.resolve("places_ref.parquet"));
+        double[] originLats = placesLatLon[0], originLons = placesLatLon[1];
+        int nOrigins = originLats.length;
+        System.err.printf("  %,d places%n", nOrigins);
+        System.err.printf("  Estimated output: %.1f GB (%,d x %,d x 2B)%n",
+            (double) nOrigins * nDest * 2 / 1e9, nOrigins, nDest);

-        System.out.println("Transport network loaded successfully");
-
-        HttpServer server = HttpServer.create(new InetSocketAddress(8003), 0);
-
-        server.createContext("/health", exchange -> {
-            sendResponse(exchange, 200, "ok");
+        // One thread pool shared across all modes
+        ExecutorService pool = Executors.newFixedThreadPool(threads);
+        // One DuckDB connection per thread, reused across all writes
+        ThreadLocal<DuckDBConnection> threadConn = ThreadLocal.withInitial(() -> {
+            try { return Parquet.connect(); }
+            catch (Exception e) { throw new RuntimeException(e); }
        });

-        server.createContext("/travel-times", exchange -> {
-            if (!"POST".equals(exchange.getRequestMethod())) {
-                sendResponse(exchange, 405, "Method not allowed");
-                return;
+        try {
+            for (String mode : MODES) {
+                processMode(network, chunks, postcodes.codes(), originLats, originLons,
+                    nDest, outDir, mode, today, pool, threadConn);
            }
+        } finally {
+            pool.shutdown();
+            pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
+        }
+    }
+
+    private static void processMode(
+            TransportNetwork network, List<Router.DestinationChunk> chunks,
+            String[] postcodes, double[] originLats, double[] originLons, int nDest,
+            Path outDir, String mode, LocalDate date,
+            ExecutorService pool, ThreadLocal<DuckDBConnection> threadConn) throws Exception {
+
+        int nOrigins = originLats.length;
+        System.err.printf("%n=== %s ===%n", mode.toUpperCase());
+        Path modeDir = outDir.resolve(mode);
+        Files.createDirectories(modeDir);
+
+        List<Integer> remaining = findRemaining(modeDir, nOrigins);
+        int alreadyDone = nOrigins - remaining.size();
+        System.err.printf("  %,d done, %,d remaining%n", alreadyDone, remaining.size());
+
+        if (remaining.isEmpty()) {
+            System.err.println("  All origins completed for this mode!");
+            return;
+        }
+
+        long startMs = System.currentTimeMillis();
+        int total = remaining.size();
+        AtomicInteger completed = new AtomicInteger(0);
+        AtomicInteger failed = new AtomicInteger(0);
+
+        // Progress reporter on a timer instead of per-task stderr writes
+        ScheduledExecutorService reporter = Executors.newSingleThreadScheduledExecutor(r -> {
+            Thread t = new Thread(r, "progress");
+            t.setDaemon(true);
+            return t;
+        });
+        reporter.scheduleAtFixedRate(() -> {
+            int c = completed.get();
+            if (c == 0) return;
+            double secs = (System.currentTimeMillis() - startMs) / 1000.0;
+            double rate = c / secs;
+            double etaH = (total - c) / rate / 3600;
+            System.err.printf("\r  [%,d/%,d] %.1f/s | ETA %.1fh | fail %d",
+                c, total, rate, etaH, failed.get());
+        }, 2, 2, TimeUnit.SECONDS);
+
+        // Submit all work, wait for completion via CountDownLatch-like pattern
+        java.util.concurrent.CountDownLatch latch = new java.util.concurrent.CountDownLatch(remaining.size());
+
+        for (int idx : remaining) {
+            pool.submit(() -> {
+                try {
+                    processOrigin(network, chunks, postcodes, originLats[idx], originLons[idx],
+                        nDest, modeDir, mode, date, idx, threadConn.get());
+                    completed.incrementAndGet();
+                } catch (Exception e) {
+                    failed.incrementAndGet();
+                    System.err.printf("%n  [FAIL] origin %d: %s%n", idx, e.getMessage());
+                } finally {
+                    latch.countDown();
+                }
+            });
+        }
+
+        latch.await();
+        reporter.shutdown();
+
+        double elapsedH = (System.currentTimeMillis() - startMs) / 3_600_000.0;
+        int n = completed.get();
+        System.err.printf("\r  [%,d/%,d] %.1f/s | %.1fh | fail %d%n",
+            n, total, n / Math.max(elapsedH * 3600, 1), elapsedH, failed.get());
+    }
+
+    /** Compute and write travel times for a single origin, with retry on failure. */
+    private static void processOrigin(
+            TransportNetwork network, List<Router.DestinationChunk> chunks,
+            String[] postcodes, double lat, double lon, int nDest,
+            Path modeDir, String mode, LocalDate date, int idx,
+            DuckDBConnection conn) throws Exception {
+
+        Path outPath = modeDir.resolve(String.format("%06d.parquet", idx));
+        Exception lastError = null;
+
+        for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
            try {
-                handleTravelTimes(exchange);
+                short[] times = Router.computeTravelTimes(network, chunks, lat, lon, mode, nDest, date);
+                Parquet.writeTravelTimes(conn, outPath, postcodes, times);
+                return;
            } catch (Exception e) {
-                System.err.println("Error handling travel-times: " + e.getMessage());
-                e.printStackTrace();
-                sendResponse(exchange, 500, "Internal server error: " + e.getMessage());
-            }
-        });
-
-        server.setExecutor(java.util.concurrent.Executors.newFixedThreadPool(4));
-        server.start();
-        System.out.println("R5 service listening on port 8003");
-    }
-
-    private static void sendResponse(HttpExchange exchange, int status, String body) throws IOException {
-        byte[] bytes = body.getBytes(StandardCharsets.UTF_8);
-        exchange.getResponseHeaders().set("Content-Type", "application/json");
-        exchange.sendResponseHeaders(status, bytes.length);
-        try (OutputStream os = exchange.getResponseBody()) {
-            os.write(bytes);
-        }
-    }
-
-    private static void handleTravelTimes(HttpExchange exchange) throws IOException {
-        long t0 = System.currentTimeMillis();
-
-        String body = new String(exchange.getRequestBody().readAllBytes(), StandardCharsets.UTF_8);
-        TravelTimeRequest req = gson.fromJson(body, TravelTimeRequest.class);
-
-        if (req.origin == null || req.origin.length != 2) {
-            sendResponse(exchange, 400, "{\"error\":\"origin must be [lat, lon]\"}");
-            return;
-        }
-        if (req.destinations == null || req.destinations.length == 0) {
-            sendResponse(exchange, 400, "{\"error\":\"destinations must be non-empty\"}");
-            return;
-        }
-
-        String mode = req.mode != null ? req.mode : "transit";
-
-        // Build destination point set (Coordinate takes x=lon, y=lat)
-        Coordinate[] coords = new Coordinate[req.destinations.length];
-        for (int i = 0; i < req.destinations.length; i++) {
-            coords[i] = new Coordinate(req.destinations[i][1], req.destinations[i][0]); // lon, lat
-        }
-        FreeFormPointSet destinations = new FreeFormPointSet(coords);
-
-        // Build the regional task
-        RegionalTask task = new RegionalTask();
-        task.fromLat = req.origin[0];
-        task.fromLon = req.origin[1];
-        task.date = LocalDate.now();
-        task.percentiles = new int[]{50};
-        task.recordTimes = true;
-        task.destinationPointSets = new PointSet[]{ destinations };
-
-        // Set grid extents from destination point set (required by TravelTimeComputer)
-        WebMercatorExtents extents = destinations.getWebMercatorExtents();
-        task.zoom = extents.zoom;
-        task.west = extents.west;
-        task.north = extents.north;
-        task.width = extents.width;
-        task.height = extents.height;
-
-        switch (mode) {
-            case "car":
-                task.fromTime = 8 * 3600;
-                task.toTime = 8 * 3600 + 60;
-                task.maxTripDurationMinutes = 120;
-                task.accessModes = EnumSet.of(LegMode.CAR);
-                task.egressModes = EnumSet.of(LegMode.CAR);
-                task.directModes = EnumSet.of(LegMode.CAR);
-                task.transitModes = EnumSet.noneOf(TransitModes.class);
-                break;
-            case "bicycle":
-                task.fromTime = 8 * 3600;
-                task.toTime = 8 * 3600 + 60;
-                task.maxTripDurationMinutes = 120;
-                task.accessModes = EnumSet.of(LegMode.BICYCLE);
-                task.egressModes = EnumSet.of(LegMode.BICYCLE);
-                task.directModes = EnumSet.of(LegMode.BICYCLE);
-                task.transitModes = EnumSet.noneOf(TransitModes.class);
-                break;
-            case "walking":
-                task.fromTime = 8 * 3600;
-                task.toTime = 8 * 3600 + 60;
-                task.maxTripDurationMinutes = 120;
-                task.accessModes = EnumSet.of(LegMode.WALK);
-                task.egressModes = EnumSet.of(LegMode.WALK);
-                task.directModes = EnumSet.of(LegMode.WALK);
-                task.transitModes = EnumSet.noneOf(TransitModes.class);
-                break;
-            default: // transit
-                task.fromTime = 8 * 3600;
-                task.toTime = 8 * 3600 + 60; // single RAPTOR sweep
-                task.maxTripDurationMinutes = 120;
-                task.maxRides = 4;
-                task.accessModes = EnumSet.of(LegMode.WALK);
-                task.egressModes = EnumSet.of(LegMode.WALK);
-                task.directModes = EnumSet.of(LegMode.WALK);
-                task.transitModes = EnumSet.of(TransitModes.TRANSIT);
-                break;
-        }
-
-        // Compute travel times
-        TravelTimeComputer computer = new TravelTimeComputer(task, network);
-        OneOriginResult result = computer.computeTravelTimes();
-
-        TravelTimeResponse response = new TravelTimeResponse();
-        response.travel_times = new double[req.destinations.length];
-
-        TravelTimeResult tt = result.travelTimes;
-        if (tt != null) {
-            int[][] values = tt.getValues();
-            // values[percentileIndex][destinationIndex]
-            for (int i = 0; i < req.destinations.length; i++) {
-                if (i < values[0].length && values[0][i] != Integer.MAX_VALUE) {
-                    response.travel_times[i] = values[0][i]; // already in minutes
-                } else {
-                    response.travel_times[i] = -1; // unreachable
+                lastError = e;
+                if (attempt < MAX_RETRIES) {
+                    System.err.printf("%n  [RETRY %d/%d] origin %d: %s%n",
+                        attempt + 1, MAX_RETRIES, idx, e.getMessage());
                }
            }
-        } else {
-            for (int i = 0; i < req.destinations.length; i++) {
-                response.travel_times[i] = -1;
+        }
+        throw lastError;
+    }
+
+    /** Find origin indices that don't yet have output parquet files. */
+    private static List<Integer> findRemaining(Path modeDir, int nOrigins) throws Exception {
+        List<Integer> remaining = new ArrayList<>();
+        for (int i = 0; i < nOrigins; i++) {
+            Path f = modeDir.resolve(String.format("%06d.parquet", i));
+            if (!Files.exists(f) || Files.size(f) == 0) {
+                remaining.add(i);
            }
        }
+        return remaining;
+    }

-        long elapsed = System.currentTimeMillis() - t0;
-        System.out.println("Travel times (" + mode + ") computed for " + req.destinations.length +
-            " destinations in " + elapsed + "ms");
+    private static String requiredArg(String[] args, String name) {
+        for (int i = 0; i < args.length - 1; i++) {
+            if (args[i].equals(name)) return args[i + 1];
+        }
+        System.err.println("Missing required argument: " + name);
+        System.err.println("Usage: App --postcodes FILE --places FILE --output-dir DIR [--threads N]");
+        System.exit(1);
+        return null; // unreachable
+    }

-        sendResponse(exchange, 200, gson.toJson(response));
+    private static String optionalArg(String[] args, String name, String defaultValue) {
+        for (int i = 0; i < args.length - 1; i++) {
+            if (args[i].equals(name)) return args[i + 1];
+        }
+        return defaultValue;
+    }
+
+    private static String requiredEnv(String name) {
+        String val = System.getenv(name);
+        if (val == null) {
+            System.err.println("Missing required environment variable: " + name);
+            System.exit(1);
+        }
+        return val;
    }
 }
--- a/r5-java/src/main/java/propertymap/Parquet.java
+++ b/r5-java/src/main/java/propertymap/Parquet.java
@ -0,0 +1,112 @@
+package propertymap;
+
+import org.duckdb.DuckDBAppender;
+import org.duckdb.DuckDBConnection;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.List;
+
+/** DuckDB-based parquet I/O. */
+public class Parquet {
+
+    record Postcodes(String[] codes, double[] lats, double[] lons) {}
+
+    static {
+        try { Class.forName("org.duckdb.DuckDBDriver"); }
+        catch (ClassNotFoundException e) { throw new RuntimeException(e); }
+    }
+
+    /** Load England postcodes, write reference parquet, return codes + flat lat/lon arrays. */
+    static Postcodes loadEnglandPostcodes(String parquetPath, Path refOut) throws Exception {
+        try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
+            stmt.execute("CREATE TABLE postcodes AS SELECT pcds, lat, \"long\" FROM read_parquet('"
+                + parquetPath + "') WHERE ctry = 'E92000001'");
+            copyToParquet(stmt, "SELECT * FROM postcodes", refOut);
+
+            try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM postcodes")) {
+                rs.next();
+                int n = rs.getInt(1);
+                String[] codes = new String[n];
+                double[] lats = new double[n];
+                double[] lons = new double[n];
+
+                try (ResultSet data = stmt.executeQuery("SELECT pcds, lat, \"long\" FROM postcodes")) {
+                    int i = 0;
+                    while (data.next()) {
+                        codes[i] = data.getString(1);
+                        lats[i] = data.getDouble(2);
+                        lons[i] = data.getDouble(3);
+                        i++;
+                    }
+                }
+                return new Postcodes(codes, lats, lons);
+            }
+        }
+    }
+
+    /** Load places deduplicated by lat/lon, write reference parquet, return flat lat/lon arrays. */
+    static double[][] loadPlaces(String parquetPath, Path refOut) throws Exception {
+        try (DuckDBConnection conn = connect(); Statement stmt = conn.createStatement()) {
+            stmt.execute("CREATE TABLE places AS SELECT * EXCLUDE (rn) FROM ("
+                + "SELECT *, ROW_NUMBER() OVER (PARTITION BY lat, lon) AS rn "
+                + "FROM read_parquet('" + parquetPath + "')) WHERE rn = 1");
+            copyToParquet(stmt, "SELECT * FROM places", refOut);
+
+            try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM places")) {
+                rs.next();
+                int n = rs.getInt(1);
+                // Return as [lats, lons] flat arrays
+                double[] lats = new double[n];
+                double[] lons = new double[n];
+
+                try (ResultSet data = stmt.executeQuery("SELECT lat, lon FROM places")) {
+                    int i = 0;
+                    while (data.next()) {
+                        lats[i] = data.getDouble(1);
+                        lons[i] = data.getDouble(2);
+                        i++;
+                    }
+                }
+                return new double[][]{lats, lons};
+            }
+        }
+    }
+
+    /** Write postcode travel times as a ZSTD-compressed parquet (atomic via tmp + rename). */
+    static void writeTravelTimes(DuckDBConnection conn, Path outPath, String[] postcodes, short[] times)
+            throws Exception {
+        Path tmp = outPath.resolveSibling(outPath.getFileName() + ".tmp");
+        try (Statement stmt = conn.createStatement()) {
+            stmt.execute("DROP TABLE IF EXISTS t");
+            stmt.execute("CREATE TABLE t (pcds VARCHAR, travel_minutes SMALLINT)");
+        }
+        try (DuckDBAppender appender = conn.createAppender("main", "t")) {
+            for (int i = 0; i < postcodes.length; i++) {
+                appender.beginRow();
+                appender.append(postcodes[i]);
+                appender.append(times[i]);
+                appender.endRow();
+            }
+        }
+        try (Statement stmt = conn.createStatement()) {
+            stmt.execute("COPY t TO '" + tmp.toAbsolutePath() + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
+        }
+        Files.move(tmp, outPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
+    }
+
+    /** Create a new in-memory DuckDB connection (for use as a per-thread reusable connection). */
+    static DuckDBConnection connect() throws Exception {
+        return (DuckDBConnection) DriverManager.getConnection("jdbc:duckdb:");
+    }
+
+    private static void copyToParquet(Statement stmt, String query, Path outPath) throws Exception {
+        stmt.execute("COPY (" + query + ") TO '" + outPath.toAbsolutePath()
+            + "' (FORMAT PARQUET, COMPRESSION ZSTD)");
+    }
+}
--- a/r5-java/src/main/java/propertymap/Router.java
+++ b/r5-java/src/main/java/propertymap/Router.java
@ -0,0 +1,211 @@
+package propertymap;
+
+import com.conveyal.r5.OneOriginResult;
+import com.conveyal.r5.analyst.FreeFormPointSet;
+import com.conveyal.r5.analyst.PointSet;
+import com.conveyal.r5.analyst.TravelTimeComputer;
+import com.conveyal.r5.analyst.WebMercatorExtents;
+import com.conveyal.r5.analyst.cluster.RegionalTask;
+import com.conveyal.r5.analyst.cluster.TravelTimeResult;
+import com.conveyal.r5.api.util.LegMode;
+import com.conveyal.r5.api.util.TransitModes;
+import com.conveyal.r5.kryo.KryoNetworkSerializer;
+import com.conveyal.r5.transit.TransportNetwork;
+import org.locationtech.jts.geom.Coordinate;
+
+import java.io.File;
+import java.time.LocalDate;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.List;
+
+/** R5 routing: network loading, point set construction, travel time computation. */
+public class Router {
+
+    private static final int ZOOM = 9;
+    private static final int MAX_GRID_CELLS = 4_900_000; // under R5's 5M limit
+
+    /**
+     * A chunk of destinations that fits within R5's grid cell limit at zoom 9.
+     * originalIndices maps each position in this chunk back to the full destinations array.
+     */
+    record DestinationChunk(FreeFormPointSet pointSet, WebMercatorExtents extents, int[] originalIndices) {}
+
+    /** Load or build the transport network with Kryo caching. */
+    static TransportNetwork loadNetwork(String dataDir, String cacheDir) throws Exception {
+        System.err.println("Loading transport network...");
+        File cacheFile = new File(cacheDir, "network.dat");
+        TransportNetwork network;
+
+        if (cacheFile.exists()) {
+            System.err.println("  Loading cached network from " + cacheFile);
+            network = KryoNetworkSerializer.read(cacheFile);
+        } else {
+            System.err.println("  Building network (first run, takes a few minutes)...");
+            network = TransportNetwork.fromDirectory(new File(dataDir));
+            new File(cacheDir).mkdirs();
+            KryoNetworkSerializer.write(network, cacheFile);
+            System.err.println("  Cached to " + cacheFile);
+        }
+
+        System.err.println("  Building distance tables...");
+        network.transitLayer.buildDistanceTables(null);
+        System.err.println("  Network ready");
+        return network;
+    }
+
+    /**
+     * Split destinations into geographic chunks that each fit within R5's grid cell limit.
+     * Sorts by latitude and splits into bands so each band's bounding box at zoom 9 is under 5M cells.
+     */
+    static List<DestinationChunk> buildDestinationChunks(double[] lats, double[] lons) {
+        int n = lats.length;
+
+        // Sort indices by latitude for geographic chunking
+        Integer[] sorted = new Integer[n];
+        for (int i = 0; i < n; i++) sorted[i] = i;
+        Arrays.sort(sorted, (a, b) -> Double.compare(lats[a], lats[b]));
+
+        // Determine grid width (longitude span is the same for all chunks)
+        double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
+        for (double lon : lons) {
+            minLon = Math.min(minLon, lon);
+            maxLon = Math.max(maxLon, lon);
+        }
+        int totalPixels = 256 << ZOOM;
+        int gridWidth = lonToPixel(maxLon, totalPixels) - lonToPixel(minLon, totalPixels) + 1;
+        int maxHeight = MAX_GRID_CELLS / gridWidth;
+
+        // Greedily build chunks: extend each band until it would exceed maxHeight
+        List<DestinationChunk> chunks = new ArrayList<>();
+        int start = 0;
+        while (start < n) {
+            int end = start + 1;
+            int topPixel = latToPixel(lats[sorted[start]], totalPixels);
+
+            while (end < n) {
+                int bottomPixel = latToPixel(lats[sorted[end]], totalPixels);
+                if (Math.abs(bottomPixel - topPixel) + 1 > maxHeight) break;
+                end++;
+            }
+
+            chunks.add(buildChunk(lats, lons, sorted, start, end));
+            start = end;
+        }
+
+        System.err.printf("  Split into %d chunks at zoom %d (grid width %d, max height %d)%n",
+            chunks.size(), ZOOM, gridWidth, maxHeight);
+        return chunks;
+    }
+
+    /** Compute travel times from one origin to all destinations across all chunks. */
+    static short[] computeTravelTimes(
+            TransportNetwork network, List<DestinationChunk> chunks,
+            double originLat, double originLon, String mode, int nDest, LocalDate date) {
+
+        short[] times = new short[nDest];
+        Arrays.fill(times, (short) -1);
+
+        for (DestinationChunk chunk : chunks) {
+            RegionalTask task = buildTask(chunk, originLat, originLon, mode, date);
+            TravelTimeComputer computer = new TravelTimeComputer(task, network);
+            OneOriginResult result = computer.computeTravelTimes();
+
+            TravelTimeResult tt = result.travelTimes;
+            if (tt != null) {
+                int[][] values = tt.getValues();
+                for (int i = 0; i < chunk.originalIndices.length && i < values[0].length; i++) {
+                    if (values[0][i] != Integer.MAX_VALUE) {
+                        times[chunk.originalIndices[i]] = (short) values[0][i];
+                    }
+                }
+            }
+        }
+        return times;
+    }
+
+    private static DestinationChunk buildChunk(
+            double[] lats, double[] lons, Integer[] sorted, int start, int end) {
+        int size = end - start;
+        int[] originalIndices = new int[size];
+        Coordinate[] coords = new Coordinate[size];
+        double minLat = Double.MAX_VALUE, maxLat = -Double.MAX_VALUE;
+        double minLon = Double.MAX_VALUE, maxLon = -Double.MAX_VALUE;
+
+        for (int i = 0; i < size; i++) {
+            int idx = sorted[start + i];
+            originalIndices[i] = idx;
+            double lat = lats[idx], lon = lons[idx];
+            coords[i] = new Coordinate(lon, lat); // x=lon, y=lat
+            minLat = Math.min(minLat, lat);
+            maxLat = Math.max(maxLat, lat);
+            minLon = Math.min(minLon, lon);
+            maxLon = Math.max(maxLon, lon);
+        }
+
+        FreeFormPointSet pointSet = new FreeFormPointSet(coords);
+        int totalPixels = 256 << ZOOM;
+        int west = lonToPixel(minLon, totalPixels);
+        int north = latToPixel(maxLat, totalPixels);
+        int width = lonToPixel(maxLon, totalPixels) - west + 1;
+        int height = latToPixel(minLat, totalPixels) - north + 1;
+        WebMercatorExtents extents = new WebMercatorExtents(west, north, width, height, ZOOM);
+
+        return new DestinationChunk(pointSet, extents, originalIndices);
+    }
+
+    private static RegionalTask buildTask(
+            DestinationChunk chunk, double originLat, double originLon, String mode, LocalDate date) {
+        RegionalTask task = new RegionalTask();
+        task.fromLat = originLat;
+        task.fromLon = originLon;
+        task.date = date;
+        task.percentiles = new int[]{50};
+        task.recordTimes = true;
+        task.destinationPointSets = new PointSet[]{chunk.pointSet};
+        task.zoom = chunk.extents.zoom;
+        task.west = chunk.extents.west;
+        task.north = chunk.extents.north;
+        task.width = chunk.extents.width;
+        task.height = chunk.extents.height;
+        task.fromTime = 8 * 3600;
+        task.toTime = 8 * 3600 + 60;
+        task.maxTripDurationMinutes = 120;
+
+        configureMode(task, mode);
+        return task;
+    }
+
+    private static void configureMode(RegionalTask task, String mode) {
+        switch (mode) {
+            case "car" -> setDirectMode(task, LegMode.CAR);
+            case "bicycle" -> setDirectMode(task, LegMode.BICYCLE);
+            case "walking" -> setDirectMode(task, LegMode.WALK);
+            case "transit" -> {
+                task.maxRides = 4;
+                task.accessModes = EnumSet.of(LegMode.WALK);
+                task.egressModes = EnumSet.of(LegMode.WALK);
+                task.directModes = EnumSet.of(LegMode.WALK);
+                task.transitModes = EnumSet.of(TransitModes.TRANSIT);
+            }
+            default -> throw new IllegalArgumentException("Unknown mode: " + mode);
+        }
+    }
+
+    private static void setDirectMode(RegionalTask task, LegMode legMode) {
+        task.accessModes = EnumSet.of(legMode);
+        task.egressModes = EnumSet.of(legMode);
+        task.directModes = EnumSet.of(legMode);
+        task.transitModes = EnumSet.noneOf(TransitModes.class);
+    }
+
+    private static int lonToPixel(double lon, int totalPixels) {
+        return (int) Math.floor(totalPixels * (lon + 180.0) / 360.0);
+    }
+
+    private static int latToPixel(double lat, int totalPixels) {
+        double latRad = Math.toRadians(lat);
+        return (int) Math.floor(totalPixels * (1.0 - Math.log(Math.tan(latRad) + 1.0 / Math.cos(latRad)) / Math.PI) / 2.0);
+    }
+}
--- a/uv.lock
+++ b/uv.lock
@ -140,6 +140,15 @@ css = [
    { name = "tinycss2", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
 ]

+[[package]]
+name = "blinker"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" },
+]
+
 [[package]]
 name = "branca"
 version = "0.8.2"
@ -379,6 +388,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" },
 ]

+[[package]]
+name = "fake-useragent"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/43/948d10bf42735709edb5ae51e23297d034086f17fc7279fef385a7acb473/fake_useragent-2.2.0.tar.gz", hash = "sha256:4e6ab6571e40cc086d788523cf9e018f618d07f9050f822ff409a4dfe17c16b2", size = 158898, upload-time = "2025-04-14T15:32:19.238Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/37/b3ea9cd5558ff4cb51957caca2193981c6b0ff30bd0d2630ac62505d99d0/fake_useragent-2.2.0-py3-none-any.whl", hash = "sha256:67f35ca4d847b0d298187443aaf020413746e56acd985a611908c73dba2daa24", size = 161695, upload-time = "2025-04-14T15:32:17.732Z" },
+]
+
 [[package]]
 name = "fastexcel"
 version = "0.19.0"
@ -400,6 +418,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" },
 ]

+[[package]]
+name = "flask"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "blinker", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "click", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "itsdangerous", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "jinja2", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "markupsafe", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "werkzeug", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" },
+]
+
 [[package]]
 name = "folium"
 version = "0.20.0"
@ -593,6 +628,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321, upload-time = "2020-11-01T10:59:58.02Z" },
 ]

+[[package]]
+name = "itsdangerous"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" },
+]
+
 [[package]]
 name = "jedi"
 version = "0.19.2"
@ -1367,7 +1411,9 @@ name = "property-map"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "fake-useragent", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
    { name = "fastexcel", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+    { name = "flask", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
    { name = "folium", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
    { name = "httpx", extra = ["socks"], marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
    { name = "ipywidgets", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
@ -1397,8 +1443,11 @@ dev = [

 [package.metadata]
 requires-dist = [
+    { name = "fake-useragent", specifier = ">=2.2.0" },
    { name = "fastexcel", specifier = ">=0.19.0" },
+    { name = "flask" },
    { name = "folium", specifier = ">=0.20.0" },
+    { name = "httpx" },
    { name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
    { name = "ipywidgets", specifier = ">=8.0.0" },
    { name = "jupyter", specifier = ">=1.0.0" },
@ -1407,6 +1456,7 @@ requires-dist = [
    { name = "osmium", specifier = ">=4.0.0" },
    { name = "pandas", specifier = ">=2.0.0" },
    { name = "plotly", specifier = ">=6.5.2" },
+    { name = "polars" },
    { name = "polars", specifier = ">=1.37.1" },
    { name = "pyarrow", specifier = ">=15.0.0" },
    { name = "pyproj", specifier = ">=3.7.2" },
@ -2127,6 +2177,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616, upload-time = "2025-10-07T21:16:34.951Z" },
 ]

+[[package]]
+name = "werkzeug"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe", marker = "python_full_version < '3.14' and sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754, upload-time = "2026-01-08T17:49:23.247Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" },
+]
+
 [[package]]
 name = "widgetsnbextension"
 version = "4.0.15"