Merge branch 'main' of https://github.com/rubyhrzhang/property-map

2026-02-01 20:04:27 +00:00 · 2026-02-01 20:04:27 +00:00 · 897dae77ac
commit 897dae77ac
parent e2ef516640 66c2a25457
104 changed files with 16454 additions and 4622 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,13 @@
 data/
 data_sources/
 .venv
 **/node_modules
 **/dist
 server-rs/target
 .git
 .task
 .claude
 __pycache__
 *.parquet
 analyses/
 *.log
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -0,0 +1,49 @@
 name: Docker
 on:
  push:
    branches: [main]
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Log in to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=raw,value=latest
            type=sha,prefix=sha-,format=short
      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          context: .
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,6 @@ tfl_journey_client
 **/node_modules
 **/__pycache__
 **/dist
 server-rs/target
 .task
 data
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -0,0 +1,31 @@
 {
    "recommendations": [
        "esbenp.prettier-vscode",
        "dbaeumer.vscode-eslint",
        "ms-toolsai.jupyter",
        "ms-python.python",
        "GitHub.copilot",
        "ms-azuretools.vscode-docker",
        "redhat.vscode-yaml",
        "1yib.rust-bundle",
        "alexcvzz.vscode-sqlite",
        "esbenp.prettier-vscode",
        "dbaeumer.vscode-eslint",
        "ms-python.python",
        "ms-toolsai.jupyter",
        "ms-azuretools.vscode-docker",
        "redhat.vscode-yaml",
        "tomoki1207.pdf",
        "qwtel.sqlite-viewer",
        "alexcvzz.vscode-sqlite",
        "rust-lang.rust-analyzer",
        "pkief.material-icon-theme",
        "detachhead.basedpyright",
        "editorconfig.editorconfig",
        "davidanson.vscode-markdownlint",
        "charliermarsh.ruff",
        "timonwong.shellcheck",
        "tonybaloney.vscode-pets",
        "vadimcn.vscode-lldb"
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -2,6 +2,9 @@
    "files.exclude": {
        "*.venv": true,
        "**/__pycache__": true,
-        "**/node_modules": true
+        "**/node_modules": true,
        "**/.ruff_cache":true,
        "**/.pytest_cache":true,
        "**/target":true
    }
-}
+}
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -2,68 +2,228 @@
 This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
 NEVER EVER RUN GIT COMMANDS!!
 ## Project Overview
-Property Map is a full-stack geospatial web application that visualizes UK property price data aggregated by H3 hexagonal spatial indices. It combines Land Registry price data with postcode geolocation to create an interactive map for exploring property markets.
+Property Map is a full-stack geospatial application for visualizing UK property data on an interactive map. It combines Land Registry price-paid data, EPC energy certificates, postcode geolocation, TFL journey times, Index of Deprivation scores, crime statistics, ethnicity data, broadband speeds, school ratings, road noise, and OpenStreetMap POIs into a single wide parquet file, then serves aggregated H3 hexagon statistics and POI data via a Rust backend.
 ## Commands
-All commands use [Task](https://taskfile.dev) runner. Install with: `curl -1sLf 'https://dl.cloudsmith.io/public/task/task/setup.deb.sh' | sudo -E bash`
+All commands use [Task](https://taskfile.dev) runner. Python uses `uv run`. Frontend uses `npm run` from `frontend/`.
 ```bash
-# Initial setup (downloads ~GB of data, runs pipeline)
+# Development servers
-task prepare
+task dev:server           # Rust backend on :8001 (cargo run --release)
 task dev:frontend         # Webpack dev server on :3030 (proxies /api to :8001)
-# Development (run in separate terminals)
+# Data pipeline
-task server      # FastAPI backend on :8001
+task prepare              # Build wide.parquet from all pre-downloaded sources
 task frontend    # Webpack dev server on :3030 (proxies /api to :8001)
-# Code quality
+# Quality
-task lint        # Lint Python (ruff) + TypeScript (ESLint + Prettier)
+task lint                 # Lint all: Python (ruff) + TypeScript (ESLint+Prettier) + Rust (clippy+fmt)
-task format      # Auto-fix formatting
+task format               # Auto-fix formatting for all languages
-task typecheck   # TypeScript type checking
+task test                 # Python tests (fuzzy join, haversine, POI counts)
-task check       # All checks (lint + typecheck + build)
+task check                # Full validation: lint + build + test
-# Production
+# Building
-task build       # Build frontend
+task build:frontend       # TypeScript typecheck + webpack production build
-task prod        # Serve built frontend via FastAPI
+task build:server         # cargo build --release (NOTE: dir is wrong in Taskfile, run from server-rs/)
 # Granular lint/format
 task lint:python          # uv run ruff check .
 task lint:frontend        # eslint + prettier --check
 task lint:rust            # cargo clippy -- -D warnings && cargo fmt --check
 task format:python        # ruff check --fix && ruff format
 task format:frontend      # eslint --fix + prettier --write
 task format:rust          # cargo fmt --all
 ```
 Running individual tests:
 ```bash
 uv run pytest pipeline/utils/test_haversine.py       # Single test file
 uv run pytest pipeline/utils/test_haversine.py -k "test_name"  # Single test
 ```
 ## Architecture
 ### Data Flow
 ```
-frontend/          React + TypeScript SPA (deck.gl/MapLibre for visualization)
+Raw sources → [Download scripts] → data/*.parquet
-  src/App.tsx      Main component with filters and map state
+  → [Fuzzy join EPC ↔ Price-Paid] → epc_pp.parquet
-  src/components/  Map.tsx (deck.gl H3HexagonLayer), Filters UI
+  → [Merge all datasets] → wide.parquet
-
+  → [Rust server loads into memory + precomputes H3 + spatial grid]
-server/            FastAPI backend
+  → [Frontend renders deck.gl H3HexagonLayer over MapLibre GL]
  main.py          App setup, CORS, static file mounting
  routes/hexagons.py  GET /api/hexagons - returns aggregated price data
 pipeline/          Data processing (Polars + H3)
  config.py        Central config (H3 resolutions 6-11, year/price ranges)
  sources/         Postcode loading, property price joins
  processors/      H3 aggregation (count, avg/median/min/max by cell+year)
 tfl_journey_client/  Generated TFL API client (local package)
 ```
-## Data Flow
+### Data Pipeline (`pipeline/`)
-1. **Download**: Land Registry prices + ArcGIS postcode→lat/lon mappings → `data_sources/`
+Python + Polars. Two phases:
 2. **Pipeline**: Join data, compute H3 indices, aggregate stats → `data_sources/processed/aggregates/*.parquet`
 3. **Serve**: Load parquet files into memory, filter by bounds/year/price, return as GeoJSON-like response
 4. **Visualize**: Frontend fetches on viewport change, renders hexagons colored by average price
-## Tech Stack
+1. **Download** (`pipeline/download/`) — Each script fetches one raw dataset into `data/`
 2. **Transform** (`pipeline/transform/`) — Joins and derives features:
   - `join_epc_pp.py` — Fuzzy-joins EPC ↔ price-paid by address within postcode buckets
   - `merge.py` — **Main pipeline**: joins all datasets → `wide.parquet` with human-readable column names
   - `transform_poi.py` — Filters POIs, maps to friendly names + emoji (exhaustive category validation)
   - `poi_proximity.py` — Counts POIs within 2km per postcode using 0.05° spatial grid
   - `crime.py` — Aggregates crime CSVs into yearly averages by LSOA
- **Frontend**: React 18, TypeScript, Webpack, TailwindCSS, deck.gl, MapLibre GL
+**Critical: column renaming in `merge.py`** — The pipeline renames columns from snake_case to human-readable names before writing `wide.parquet`. The Rust server auto-discovers features from whatever column names exist in the parquet. Key renames:
- **Backend**: Python 3.12, FastAPI, Polars, H3
+- `pp_address` → `Address per Property Register`
- **Package managers**: `uv` (Python), `npm` (frontend)
+- `postcode` → `Postcode`
 - `latest_price` → `Last known price`
 - `duration` → `Leashold/Freehold`
 - `total_floor_area` → `Total floor area (sqm)`
 - `current_energy_rating` → `Current energy rating`
 The server and frontend must handle these human-readable names. See the full rename map in `merge.py`.
 ### Backend (`server-rs/`)
 Rust + Axum. Loads parquet into memory at startup.
 **Structure:**
 - `data/property.rs` — Loads `wide.parquet`, auto-discovers numeric + enum features, computes histograms, sorts rows by spatial locality, precomputes H3 cells (resolutions 4–12)
 - `data/poi.rs` — Loads `filtered_uk_pois.parquet`
 - `index.rs` — `GridIndex`: 0.01° spatial grid for O(1) cell lookup
 - `filter.rs` — Parses filter strings and checks rows. Format: `name:min:max` (numeric), `name:val1|val2` (enum)
 - `routes/` — One file per endpoint
 - `consts.rs` — Key constants (histogram bins, H3 range, max enum cardinality, excluded columns)
 **API endpoints:**
 - `GET /api/features` — Feature metadata with histograms and 2nd/98th percentiles
 - `GET /api/hexagons?resolution=&bounds=&filters=` — H3 aggregates (min/max per feature per hex)
 - `GET /api/hexagon-properties?h3=&resolution=&filters=&limit=&offset=` — Paginated properties within a hexagon
 - `GET /api/pois?bounds=&categories=` — POIs by bounds (max 5000)
 - `GET /api/poi-categories` — Available POI category names
 Serves `frontend/dist/` as static fallback in production.
 **Data representation:**
 - Numeric features: row-major flat `Vec<f64>`, NaN = null
 - Enum features: `Vec<u8>` indices into value list, 255 = null
 - String fields (address, postcode): `Vec<String>`, empty = null
 - The server accepts the parquet path as a CLI argument (defaults to `data_sources/processed/wide.parquet`)
 ### Frontend (`frontend/`)
 React 18 + TypeScript. deck.gl `H3HexagonLayer` over MapLibre GL. TailwindCSS. No state management library — pure React hooks.
 **Key patterns:**
 - `App.tsx` manages all state, API fetching (150ms debounce), and URL state sync (300ms debounce)
 - URL encodes view/filters/POI categories/active tab as query params for shareable links
 - AbortControllers cancel in-flight requests on new queries
 - Zoom → H3 resolution: `<7→7, <9.5→8, <11→9, <13→10, ≥13→11`
 - Bounds quantized to 0.01° to match backend caching
 - Properties pane uses feature names from API response (human-readable), not hardcoded field names
 - Proxy: dev server on :3030 proxies `/api` to :8001; also handles VS Code `/proxy/PORT` patterns
 ## Frontend Design Guide (STRICT — must be followed for all UI changes)
 The frontend uses Tailwind's `darkMode: 'class'` strategy. The `dark` class is toggled on `<html>`. Every visible element must have both light and dark styles. **Never add a light-only color class without its `dark:` counterpart.** Run `task build:frontend` after any UI change to verify.
 ### Theme System
 - **State**: `App.tsx` owns a `theme` state (`'light' | 'dark' | 'system'`), persisted in `localStorage` under the key `theme`, default `'system'`.
 - **Effective theme**: When `'system'`, resolved via `window.matchMedia('(prefers-color-scheme: dark)')`. A `change` listener re-renders on OS preference flip.
 - **Toggle cycle**: light → dark → system → light. Three-way, not binary.
 - **Flash prevention**: `index.html` contains an inline `<script>` that applies the `dark` class before first paint. If the localStorage/matchMedia logic in that script changes, update it to match `App.tsx`.
 - **Prop plumbing**: `effectiveTheme` (`'light' | 'dark'`) is passed as a prop to `<Map>` and `<HomePage>`. Components that need the resolved theme must receive it as a prop — do not read localStorage or matchMedia inside child components.
 ### Color Token Reference
 Every UI element must use the correct token from this table. Do not invent new pairings.
 | Role | Light class | Dark class | Hex (dark) |
 |------|------------|------------|------------|
 | **Page / pane background** | `bg-warm-50` or `bg-white` | `dark:bg-warm-900` | #1c1917 |
 | **Card / elevated surface** | `bg-white` | `dark:bg-warm-800` | #292524 |
 | **Inset / recessed surface** | `bg-warm-100` or `bg-warm-50` | `dark:bg-warm-800` | #292524 |
 | **Input / select background** | `bg-white` | `dark:bg-warm-800` or `dark:bg-warm-900` | |
 | **Primary border** | `border-warm-200` | `dark:border-warm-700` | #44403c |
 | **Subtle border (dividers)** | `border-warm-100` | `dark:border-warm-800` | #292524 |
 | **Primary text (headings)** | `text-navy-950` or implicit dark | `dark:text-warm-100` | #f5f5f4 |
 | **Body text** | `text-warm-700` | `dark:text-warm-300` | #d6d3d1 |
 | **Secondary text (labels, hints)** | `text-warm-500` or `text-warm-600` | `dark:text-warm-400` | #a8a29e |
 | **Disabled / placeholder text** | `text-warm-400` / `placeholder-warm-400` | `dark:text-warm-500` / `dark:placeholder-warm-500` | #78716c |
 | **Accent text (links, actions)** | `text-teal-600` | `dark:text-teal-400` | #1de4c3 |
 | **Accent hover text** | `hover:text-teal-800` | `dark:hover:text-teal-300` | #51f7d9 |
 | **Accent background (highlights)** | `bg-teal-50` | `dark:bg-teal-900/30` | |
 | **Active ring / focus ring** | `ring-teal-400` | same — works in both | |
 | **Price / key metric text** | `text-teal-700` | `dark:text-teal-400` | |
 | **Remove / close button** | `text-warm-400 hover:text-warm-700` | `dark:hover:text-warm-300` | |
 | **Checkbox accent** | `accent-teal-600` | same — works in both | |
 | **Header (unchanged both modes)** | `bg-navy-900 text-white` | same | |
 ### Mapping Rules for Specific Contexts
 **Sidebars (Filters, POIPane, PropertiesPane, right-pane tabs):**
 - Container: `bg-white dark:bg-warm-900`
 - Inner cards / dropdown menus: `bg-white dark:bg-warm-800`
 - Borders: `border-warm-200 dark:border-warm-700`
 - Tab text (active): add `dark:text-warm-100`
 - Tab text (inactive): `text-warm-600 dark:text-warm-400`
 **Map overlays (PostcodeSearch, MapLegend, POI popup, loading indicator):**
 - Background: `bg-white dark:bg-warm-800`
 - Text: `dark:text-warm-200`
 - Semi-transparent variants: use `/90` opacity suffix (e.g. `dark:bg-warm-800/90`)
 - Deck.gl tooltip (inline styles, not Tailwind): use `#292524` bg / `#e7e5e4` text / `rgba(0,0,0,0.5)` shadow in dark.
 - Deck.gl postcode labels (RGB arrays): `[220,220,220,220]` text / `[30,30,30,200]` outline in dark; inverse in light.
 **Map basemaps:**
 - Light: `https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json`
 - Dark: `https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json`
 - `handleMapLoad` must only apply label/water tweaks in light mode. Dark Matter has good defaults.
 **HomePage (landing page):**
 - Page bg: `bg-warm-50 dark:bg-warm-900`
 - Cards: `bg-white dark:bg-warm-800` with `border-warm-200 dark:border-warm-700`
 - Backdrop-blur panels: use `/60` or `/40` opacity on both `bg-warm-50` and `dark:bg-warm-900`
 - HexCanvas: reads `isDark` ref; uses dimmer fill (`#058172`) and stroke (`#0a665b`) at 60% opacity multiplier.
 - All headings: `dark:text-warm-100`. All body: `dark:text-warm-300` or `dark:text-warm-400`.
 **DataSourcesPage:**
 - Same card pattern as above. Footer is already dark (`bg-navy-900`) — no changes needed.
 - License badges: `bg-warm-100 dark:bg-warm-700 text-warm-600 dark:text-warm-300`
 - Links: `text-teal-600 dark:text-teal-400`
 **DataSources floating button (on map):**
 - `bg-white/90 dark:bg-warm-800/90` with `text-teal-600 dark:text-teal-400`
 ### Rules for New Components
 1. **Every `bg-white` needs `dark:bg-warm-800` or `dark:bg-warm-900`.** Pane-level = warm-900, card-level = warm-800.
 2. **Every `border-warm-200` needs `dark:border-warm-700`.**
 3. **Every `text-warm-*` needs a `dark:text-warm-*` counterpart.** Follow the token table — don't guess.
 4. **Every `text-teal-600` needs `dark:text-teal-400`.** Every `hover:text-teal-800` needs `dark:hover:text-teal-300`.
 5. **Every `bg-teal-50` needs `dark:bg-teal-900/30`.**
 6. **Every `hover:bg-warm-50` needs `dark:hover:bg-warm-700` or `dark:hover:bg-warm-800`.**
 7. **Inputs and selects**: always add `dark:bg-warm-800 dark:text-warm-200 dark:border-warm-700`. Placeholders get `dark:placeholder-warm-500`.
 8. **Checkboxes**: always include `accent-teal-600 rounded`.
 9. **Do not use Tailwind `dark:` classes inside deck.gl layers or canvas code.** Use the `theme` prop / ref and conditional JS values.
 10. **Do not add `transition-*` classes for theme switching.** The global CSS rule in `index.css` handles transitions for `background-color`, `border-color`, and `color` on all standard HTML elements. Adding per-element transition classes will conflict.
 11. **Never hardcode hex colors in JSX `style=` props for themed elements** (except deck.gl tooltip and canvas, which can't use Tailwind). Use the Tailwind classes from the token table instead.
 12. **The header (`bg-navy-900`) is identical in both themes.** Do not add dark variants to it.
 ### Verification Checklist (for any UI PR)
 - [ ] `task build:frontend` passes with no errors
 - [ ] Every new `bg-*`, `text-*`, `border-*` class has a `dark:` counterpart (search your diff)
 - [ ] Toggle through all three modes (light → dark → system) with no flash
 - [ ] Map basemap switches when theme changes
 - [ ] Sidebars, dropdowns, and popups are readable in both modes
 - [ ] HomePage and DataSourcesPage adapt correctly
 ## Key Implementation Details
- Backend caches dataframes in memory and uses LRU cache on queries
+- **Spatial sort**: Rows sorted by 0.01° grid cell at load time for cache-friendly sequential access
- Bounds rounded to 0.01° precision to improve cache hits
+- **Row-major layout**: `feature_data[row * num_features + feat_idx]` — all features for one property are contiguous
- Results capped at 50,000 hexagons per request (truncated flag in response)
+- **H3 precomputation**: Resolutions 4–12 computed in parallel (rayon) at startup
- Frontend debounces API calls on map movement
+- **Histogram percentiles without sorting**: O(n) two-pass algorithm — build histogram, interpolate percentiles
 - **Direct JSON writing**: Hexagon endpoint writes JSON via string buffer, avoids serde_json::Value allocations
 - **POI transform validation**: Fails if any OSM category is unmapped — guarantees exhaustive coverage
 - **Fuzzy join**: Groups by postcode, uses `thefuzz.token_sort_ratio` with numeric token compatibility, greedy assignment from highest score
 - **Filter bounds format**: `south,west,north,east` (not standard bbox order)
 - **POI proximity**: Uses 0.05° grid (~5km cells) to reduce candidates before haversine distance check
--- a/25
+++ b/25
@ -0,0 +1,25 @@
 # Stage 1: Build frontend
 FROM node:20-slim AS frontend
 WORKDIR /app/frontend
 COPY frontend/package.json frontend/package-lock.json ./
 RUN npm ci
 COPY frontend/ ./
 RUN npm run build
 # Stage 2: Build Rust server
 FROM rust:1.83-bookworm AS server
 WORKDIR /app
 COPY server-rs/ server-rs/
 WORKDIR /app/server-rs
 RUN cargo build --release
 # Stage 3: Runtime
 FROM debian:bookworm-slim
 RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY --from=server /app/server-rs/target/release/property-map-server ./
 COPY --from=frontend /app/frontend/dist ./dist/
 EXPOSE 8001
 ENTRYPOINT ["./property-map-server"]
 CMD ["--data", "/data/wide.parquet", "--pois", "/data/filtered_uk_pois.parquet"]
--- a/Journey.yaml
+++ b/Journey.yaml
--- a/README.md
+++ b/README.md
@ -69,3 +69,11 @@ Nice to haves?
  - [Local Autheority (Upper Tier)](https://communitiesopendata-communities.hub.arcgis.com/datasets/6e8edb2974da4834bbafa09644a5b02d_0/explore?location=52.684195%2C-2.489482%2C7.17)
 - [Open Geography](https://geoportal.statistics.gov.uk/)
 - [CommunitiesOpenData](https://communitiesopendata-communities.hub.arcgis.com/)
 - [PlanetOSM](https://planet.openstreetmap.org/) for open street map POI
 - [TFL api](https://api-portal.tfl.gov.uk/signin)
 - [EPC](https://epc.opendatacommunities.org/login) - <https://epc.opendatacommunities.org/downloads/domestic>
 rightmove:
 curl '<https://www.rightmove.co.uk/api/property-search/listing/search?searchLocation=E14&useLocationIdentifier=true&locationIdentifier=OUTCODE%5E749&buy=For+sale&radius=20.0&_includeSSTC=on&index=0&sortType=2&channel=BUY&transactionType=BUY>'
 curl '<https://www.onthemarket.com/async/search/properties-v2/?search-type=for-sale&location-id=e13&view=map-list>'
--- a/Taskfile.data.yml
+++ b/Taskfile.data.yml
@ -0,0 +1,191 @@
 version: '3'
 vars:
  DATA_DIR: /bulk/property-data
  ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
  PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
  IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
  POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
  POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
  POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
  EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
  WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
  EPC: "{{.DATA_DIR}}/certificates.csv"
  JOURNEY_TIMES: "./data_sources/processed/journey_times_bank_checkpoint.parquet"
  ETHNICITY_OUTPUT: "{{.DATA_DIR}}/ethnicity_by_la.parquet"
  CRIME_DIR: "{{.DATA_DIR}}/crime"
  CRIME_OUTPUT: "{{.DATA_DIR}}/crime_by_lsoa.parquet"
  NOISE_OUTPUT: "{{.DATA_DIR}}/road_noise.parquet"
  OFSTED_OUTPUT: "{{.DATA_DIR}}/ofsted.parquet"
  NAPTAN_OUTPUT: "{{.DATA_DIR}}/naptan.parquet"
  BROADBAND_OUTPUT: "{{.DATA_DIR}}/broadband.parquet"
  SCHOOL_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/school_proximity.parquet"
 tasks:
  prompt:epc:
    desc: Prompt user to download EPC dataset (requires registration)
    status:
      - test -f {{.EPC}}
    cmds:
      - |
        echo ""
        echo "=== EPC dataset not found ==="
        echo "The EPC certificates file is required: {{.EPC}}"
        echo ""
        echo "To obtain it, register at https://epc.opendatacommunities.org/login"
        echo ""
        exit 1
  prompt:journey-times:
    desc: Download TFL journey times if missing (requires API key registration)
    status:
      - test -f {{.JOURNEY_TIMES}}
    deps:
      - download:arcgis
    cmds:
      - |
        echo ""
        echo "=== TFL journey times not found ==="
        echo "Register for a TFL API key at https://api-portal.tfl.gov.uk/signin"
        echo "Then set the TFL_API_KEY environment variable and re-run this task."
        echo ""
        exit 1
  download:arcgis:
    desc: Download and convert ArcGIS postcode data
    status:
      - test -f {{.ARCGIS_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
  download:price-paid:
    desc: Download and convert Land Registry price-paid data
    status:
      - test -f {{.PRICE_PAID_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
  download:deprivation:
    desc: Download and convert Index of Deprivation data
    status:
      - test -f {{.IOD_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
  download:ethnicity:
    desc: Download ethnicity by local authority data
    status:
      - test -f {{.ETHNICITY_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.ethnicity --output {{.ETHNICITY_OUTPUT}}
  download:naptan:
    desc: Download NaPTAN station data
    status:
      - test -f {{.NAPTAN_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.naptan --output {{.NAPTAN_OUTPUT}}
  download:pois:
    desc: Download and extract POIs from OpenStreetMap
    status:
      - test -f {{.POIS_RAW_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
  download:ofsted:
    desc: Download Ofsted school inspection outcomes
    status:
      - test -f {{.OFSTED_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.ofsted --output {{.OFSTED_OUTPUT}}
  download:broadband:
    desc: Download Ofcom broadband performance data
    status:
      - test -f {{.BROADBAND_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.broadband --output {{.BROADBAND_OUTPUT}}
  download:noise:
    desc: Download Defra noise data (road, rail, airport) sampled at postcode centroids
    deps:
      - download:arcgis
    status:
      - test -f {{.NOISE_OUTPUT}}
    cmds:
      - uv run python -m pipeline.download.noise --arcgis {{.ARCGIS_OUTPUT}} --output {{.NOISE_OUTPUT}}
  transform:pois:
    desc: Transform raw POIs to filtered version with friendly names
    deps:
      - download:pois
      - download:naptan
    status:
      - test -f {{.POIS_FILTERED_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --naptan {{.NAPTAN_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
  transform:epc-pp:
    desc: Fuzzy join EPC and Price Paid data
    deps:
      - download:price-paid
      - prompt:epc
    status:
      - test -f {{.EPC_PP_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
  transform:crime:
    desc: Transform crime CSVs into yearly averages by LSOA
    status:
      - test -f {{.CRIME_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.crime --input {{.CRIME_DIR}} --output {{.CRIME_OUTPUT}}
  transform:poi-proximity:
    desc: Compute POI proximity counts per postcode
    deps:
      - download:arcgis
      - transform:pois
    status:
      - test -f {{.POI_PROXIMITY_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
  transform:school-proximity:
    desc: Compute good+ school proximity counts per postcode
    deps:
      - download:ofsted
      - download:arcgis
    status:
      - test -f {{.SCHOOL_PROXIMITY_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.school_proximity --ofsted {{.OFSTED_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --output {{.SCHOOL_PROXIMITY_OUTPUT}}
  download:journey-times:
    desc: Fetch TfL journey times for all postcodes
    deps:
      - download:arcgis
    status:
      - test -f {{.JOURNEY_TIMES}}
    cmds:
      - uv run python -m pipeline.journey_times
  prepare:
    desc: Build wide property dataframe with all joins
    deps:
      - transform:epc-pp
      - download:arcgis
      - download:deprivation
      - download:ethnicity
      - download:broadband
      - download:noise
      - transform:crime
      - transform:poi-proximity
      - transform:school-proximity
      - prompt:journey-times
    status:
      - test -f {{.WIDE_OUTPUT}}
    cmds:
      - uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --ethnicity {{.ETHNICITY_OUTPUT}} --crime {{.CRIME_OUTPUT}} --noise {{.NOISE_OUTPUT}} --school-proximity {{.SCHOOL_PROXIMITY_OUTPUT}} --broadband {{.BROADBAND_OUTPUT}} --output {{.WIDE_OUTPUT}}
--- a/Taskfile.yml
+++ b/Taskfile.yml
@ -1,66 +1,79 @@
 version: '3'
 includes:
  data:
    taskfile: ./Taskfile.data.yml
    flatten: true
 vars:
  DATA_DIR: /bulk/property-data
  WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
  POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
 tasks:
  install:
-    desc: Install dependencies, generate client, and download data
+    desc: Install dependencies
    cmds:
      - uv run generate_tfl_client.py
      - uv sync
      - cd frontend && npm install
-  download:
+  test:
-    desc: Download data
+    cmds:
      - uv run -m pipeline.utils.test_fuzzy_join
      - uv run pytest pipeline/utils/test_haversine.py
      - uv run pytest pipeline/utils/test_poi_counts.py
  test:server:
    desc: Run Rust backend tests
    dir: server-rs
    cmds:
      - cargo test
  dev:server:
    desc: Run Rust backend on port 8001 (debug build, fast compile)
    dir: server-rs
    cmds:
      - cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
  dev:server:release:
    desc: Run Rust backend on port 8001 (release build)
    dir: server-rs
    cmds:
      - cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
  dev:frontend:
    desc: Run frontend dev server on port 3030 (proxies /api to :8001)
    dir: frontend
    deps:
      - install
    cmds:
      - uv run python download_land_registry.py
      - uv run python download_arcgis_data.py
      - uv run python download_pois.py
  pipeline:
    desc: Run data processing pipeline
    deps:
      - download
    cmds:
      - uv run python -m pipeline.run
  prepare:
    desc: Prepare the application (install, download data, run pipeline)
    deps:
      - pipeline
  server:
    desc: Run FastAPI backend on port 8001
    cmds:
      - uv run fastapi dev server/main.py --port 8001
  frontend:
    desc: Run frontend dev server on port 3030 (proxies /api to :8001)
    dir: frontend
    cmds:
      - npm run dev
-  build:
+  build:server:
    desc: Build server for production
    dir: server-rs
    cmds:
      - cargo build --release
  build:frontend:
    desc: Build frontend for production
    dir: frontend
    cmds:
      - npm run typecheck
      - npm run build
  prod:
    desc: Run production server (serves built frontend)
    cmds:
      - uv run fastapi run server/main.py --port 8001
  lint:
-    desc: Lint all code (Python and TypeScript)
+    desc: Lint all code (Python, TypeScript, and Rust)
    cmds:
      - task: lint:python
      - task: lint:frontend
      - task: lint:rust
  lint:python:
-    desc: Lint Python code with ruff
+    desc: Lint Python code with ruff and check for unused dependencies
    cmds:
      - uv run ruff check .
      - uv run deptry .
  lint:frontend:
    desc: Lint frontend TypeScript code
@ -69,11 +82,20 @@ tasks:
      - npm run lint
      - npm run format:check
  lint:rust:
    desc: Lint Rust code with clippy, check formatting, and detect unused dependencies
    dir: server-rs
    cmds:
      - cargo clippy -- -D warnings
      - cargo fmt --check
      - cargo machete
  format:
-    desc: Format all code (Python and TypeScript)
+    desc: Format all code (Python, TypeScript, and Rust)
    cmds:
      - task: format:python
      - task: format:frontend
      - task: format:rust
  format:python:
    desc: Format Python code with ruff
@ -88,15 +110,17 @@ tasks:
      - npm run lint:fix
      - npm run format
  format:rust:
    desc: Format Rust code with cargo fmt
    dir: server-rs
    cmds:
      - cargo fmt --all
  check:
    desc: Run all checks (lint, typecheck, build)
    cmds:
      - task: lint
-      - task: typecheck
+      - task: build:server
-      - task: build
+      - task: build:frontend
-
+      - task: test
-  typecheck:
+      - task: test:server
    desc: Type check frontend TypeScript code
    dir: frontend
    cmds:
      - npm run typecheck
--- a/analyses/epc_analysis.ipynb
+++ b/analyses/epc_analysis.ipynb
--- a/analyses/journey_times_analysis.ipynb
+++ b/analyses/journey_times_analysis.ipynb
--- a/analyses/property_analysis.ipynb
+++ b/analyses/property_analysis.ipynb
--- a/analyses/wide.ipynb
+++ b/analyses/wide.ipynb
--- a/download_arcgis_data.py
+++ b/download_arcgis_data.py
@ -1,129 +0,0 @@
 #!/usr/bin/env python3
 """Download ArcGIS data and convert to Parquet."""
 # Run it with:
 #   uv run download_arcgis_data.py
 import time
 import zipfile
 import httpx
 import polars as pl
 from pathlib import Path
 from tqdm import tqdm
 URL = "https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data"
 BASE_DATA_PATH = Path("./data_sources")
 BASE_DATA_PATH.mkdir(exist_ok=True)
 DOWNLOAD_PATH = BASE_DATA_PATH / "arcgis_data.zip"
 EXTRACT_PATH = BASE_DATA_PATH / "arcgis_extracted"
 PARQUET_PATH = BASE_DATA_PATH / "arcgis_data.parquet"
 MAX_RETRIES = 3
 def download_with_progress(url: str, output_path: Path) -> None:
    """Download a file with progress bar and retry logic."""
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            with httpx.stream(
                "GET",
                url,
                follow_redirects=True,
                timeout=httpx.Timeout(30.0, read=None),
            ) as response:
                response.raise_for_status()  # pyright: ignore[reportUnusedCallResult]
                total = int(response.headers.get("content-length", 0))
                with (
                    open(output_path, "wb") as f,
                    tqdm(
                        total=total,
                        unit="B",
                        unit_scale=True,
                        unit_divisor=1024,
                        desc="Downloading",
                    ) as pbar,
                ):
                    for chunk in response.iter_bytes(chunk_size=8192):
                        f.write(chunk)
                        pbar.update(len(chunk))
                return  # Success
        except (httpx.ConnectError, httpx.ReadTimeout) as e:
            if attempt < MAX_RETRIES:
                wait = 2**attempt
                print(f"Attempt {attempt} failed: {e}. Retrying in {wait}s...")
                time.sleep(wait)
            else:
                raise
 def extract_zip(zip_path: Path, extract_path: Path) -> list[Path]:
    """Extract ZIP file and return list of extracted files."""
    print("Extracting ZIP file...")
    extract_path.mkdir(exist_ok=True)
    with zipfile.ZipFile(zip_path, "r") as zf:
        zf.extractall(extract_path)
        return [extract_path / name for name in zf.namelist()]
 def find_data_file(extract_path: Path) -> Path:
    """Find the main data file (CSV, XLSX, or similar) in extracted files."""
    # Look for common data file extensions
    for ext in ["*.csv", "*.xlsx", "*.xls", "*.json", "*.geojson"]:
        files = list(extract_path.rglob(ext))
        if files:
            # Return the largest file if multiple found
            return max(files, key=lambda f: f.stat().st_size)
    raise FileNotFoundError(f"No data file found in {extract_path}")
 def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
    """Convert data file to Parquet using Polars."""
    print(f"Converting {data_path.name} to Parquet...")
    suffix = data_path.suffix.lower()
    if suffix == ".csv":
        df = pl.read_csv(data_path, try_parse_dates=True)
    elif suffix in [".xlsx", ".xls"]:
        df = pl.read_excel(data_path)
    elif suffix in [".json", ".geojson"]:
        df = pl.read_json(data_path)
    else:
        raise ValueError(f"Unsupported file format: {suffix}")
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
    print(f"Rows: {df.height:,}")
    print(f"Columns: {df.columns}")
    print(f"Original size: {data_path.stat().st_size / 1024**2:.1f} MB")
    print(f"Parquet size: {parquet_path.stat().st_size / 1024**2:.1f} MB")
 def main() -> None:
    if PARQUET_PATH.exists():
        print(f"Parquet already exists at {PARQUET_PATH}, skipping")
        return
    if not DOWNLOAD_PATH.exists():
        download_with_progress(URL, DOWNLOAD_PATH)
    else:
        print(f"File already exists at {DOWNLOAD_PATH}, skipping download")
    # Check if it's a ZIP file
    if zipfile.is_zipfile(DOWNLOAD_PATH):
        extracted_files = extract_zip(DOWNLOAD_PATH, EXTRACT_PATH)
        print(f"Extracted {len(extracted_files)} files")
        data_file = find_data_file(EXTRACT_PATH)
    else:
        # Not a ZIP, treat as direct data file
        data_file = DOWNLOAD_PATH
    convert_to_parquet(data_file, PARQUET_PATH)
 if __name__ == "__main__":
    main()
--- a/download_deprivation_data.py
+++ b/download_deprivation_data.py
@ -1,61 +0,0 @@
 #!/usr/bin/env python3
 """Download IoD2025 Deprivation Scores and convert to Parquet."""
 import httpx
 import polars as pl
 from pathlib import Path
 URL = "https://assets.publishing.service.gov.uk/media/691ded34513046b952c500bd/File_5_IoD2025_Scores_for_the_Indices_of_Deprivation.xlsx"
 BASE_DATA_PATH = Path("./data_sources")
 BASE_DATA_PATH.mkdir(exist_ok=True)
 XLSX_PATH = BASE_DATA_PATH / "IoD2025_Scores.xlsx"
 PARQUET_PATH = BASE_DATA_PATH / "IoD2025_Scores.parquet"
 def download_file(url: str, output_path: Path) -> None:
    """Download file from URL."""
    print(f"Downloading from {url}...")
    with httpx.stream("GET", url, follow_redirects=True, timeout=60) as response:
        response.raise_for_status()
        total = int(response.headers.get("content-length", 0))
        downloaded = 0
        with open(output_path, "wb") as f:
            for chunk in response.iter_bytes(chunk_size=8192):
                f.write(chunk)
                downloaded += len(chunk)
                if total:
                    print(f"\rDownloaded {downloaded / 1024 / 1024:.1f} MB / {total / 1024 / 1024:.1f} MB", end="")
    print(f"\nSaved to {output_path}")
 def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
    """Convert Excel sheet 2 to Parquet."""
    print("Reading Excel file (sheet 2)...")
    # Read the 2nd sheet (index 1) - IoD2025 Scores
    df = pl.read_excel(
        xlsx_path,
        sheet_id=2,  # 1-indexed, so 2 = second sheet
    )
    print(f"Shape: {df.shape}")
    print(f"Columns: {df.columns}")
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
    print(f"Excel size: {xlsx_path.stat().st_size / 1024 / 1024:.1f} MB")
    print(f"Parquet size: {parquet_path.stat().st_size / 1024 / 1024:.1f} MB")
 def main() -> None:
    if not XLSX_PATH.exists():
        download_file(URL, XLSX_PATH)
    else:
        print(f"Excel file already exists at {XLSX_PATH}, skipping download")
    convert_to_parquet(XLSX_PATH, PARQUET_PATH)
 if __name__ == "__main__":
    main()
--- a/download_land_registry.py
+++ b/download_land_registry.py
@ -1,114 +0,0 @@
 #!/usr/bin/env python3
 """Download Land Registry price paid data and convert to Parquet."""
 # Run it with:
 #   uv run download_land_registry.py
 # The download failed in this environment due to network restrictions, but the script will work on your local machine. The ~5GB CSV should compress to roughly ~1GB in Parquet format with ZSTD compression.
 import time
 import httpx
 import polars as pl
 from pathlib import Path
 from tqdm import tqdm
 URL = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv"
 BASE_DATA_PATH = Path("./data_sources")
 BASE_DATA_PATH.mkdir(exist_ok=True)
 CSV_PATH = BASE_DATA_PATH / "pp-complete.csv"
 PARQUET_PATH = BASE_DATA_PATH / "pp-complete.parquet"
 MAX_RETRIES = 3
 def download_with_progress(url: str, output_path: Path) -> None:
    """Download a file with progress bar and retry logic."""
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            with httpx.stream(
                "GET",
                url,
                follow_redirects=True,
                timeout=httpx.Timeout(30.0, read=None),
            ) as response:
                response.raise_for_status()  # pyright: ignore[reportUnusedCallResult]
                total = int(response.headers.get("content-length", 0))
                with (
                    open(output_path, "wb") as f,
                    tqdm(
                        total=total,
                        unit="B",
                        unit_scale=True,
                        unit_divisor=1024,
                        desc="Downloading",
                    ) as pbar,
                ):
                    for chunk in response.iter_bytes(chunk_size=8192):
                        f.write(chunk)
                        pbar.update(len(chunk))
                return  # Success
        except (httpx.ConnectError, httpx.ReadTimeout) as e:
            if attempt < MAX_RETRIES:
                wait = 2**attempt
                print(f"Attempt {attempt} failed: {e}. Retrying in {wait}s...")
                time.sleep(wait)
            else:
                raise
 def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
    """Convert CSV to Parquet using Polars."""
    print("Converting to Parquet...")
    # https://www.gov.uk/guidance/about-the-price-paid-data
    # Land Registry CSV columns
    columns = [
        "transaction_id",
        "price",
        "date_of_transfer",
        "postcode",
        "property_type",
        "old_new",
        "duration",
        "paon",
        "saon",
        "street",
        "locality",
        "town_city",
        "district",
        "county",
        "ppd_category",
        "record_status",
    ]
    df = pl.read_csv(
        csv_path,
        has_header=False,
        new_columns=columns,
        try_parse_dates=True,
    )
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
    print(f"Rows: {df.height:,}")
    print(f"CSV size: {csv_path.stat().st_size / 1024**2:.1f} MB")
    print(f"Parquet size: {parquet_path.stat().st_size / 1024**2:.1f} MB")
 def main() -> None:
    if PARQUET_PATH.exists():
        print(f"Parquet already exists at {PARQUET_PATH}, skipping")
        return
    if not CSV_PATH.exists():
        download_with_progress(URL, CSV_PATH)
    else:
        print(f"CSV already exists at {CSV_PATH}, skipping download")
    convert_to_parquet(CSV_PATH, PARQUET_PATH)
 if __name__ == "__main__":
    main()
--- a/download_pois.py
+++ b/download_pois.py
@ -1,54 +0,0 @@
 """Download POI data for the UK from Overture Maps."""
 from pathlib import Path
 import overturemaps
 import pyarrow as pa
 import pyarrow.parquet as pq
 from tqdm import tqdm
 # UK bounding box (west, south, east, north)
 UK_BBOX = (-8.65, 49.86, 1.77, 60.86)
 OUTPUT_DIR = Path("data_sources")
 OUTPUT_FILE = OUTPUT_DIR / "uk_pois.parquet"
 def main():
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    if OUTPUT_FILE.exists():
        print(f"POI file already exists: {OUTPUT_FILE}")
        print("Delete it manually to re-download.")
        return
    print("Downloading UK POI data from Overture Maps...")
    print(f"Bounding box: {UK_BBOX}")
    print("This may take several minutes...")
    reader = overturemaps.record_batch_reader("place", bbox=UK_BBOX)
    # Read all batches
    batches = []
    with tqdm(desc="Downloading batches", unit=" batches") as pbar:
        for batch in reader:
            batches.append(batch)
            pbar.update(1)
            pbar.set_postfix(rows=sum(b.num_rows for b in batches))
    if not batches:
        print("No data found in bounding box!")
        return
    # Combine batches into a table and write
    table = pa.Table.from_batches(batches, schema=reader.schema)
    print(f"\nWriting {table.num_rows:,} POIs to {OUTPUT_FILE}...")
    pq.write_table(table, OUTPUT_FILE)
    print(f"Download complete: {OUTPUT_FILE}")
    print(f"File size: {OUTPUT_FILE.stat().st_size / 1024 / 1024:.1f} MB")
 if __name__ == "__main__":
    main()
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -11,6 +11,7 @@
        "@deck.gl/core": "^9.0.0",
        "@deck.gl/geo-layers": "^9.0.0",
        "@deck.gl/layers": "^9.0.0",
        "@deck.gl/mapbox": "^9.2.6",
        "@deck.gl/react": "^9.0.0",
        "@radix-ui/react-select": "^2.0.0",
        "@radix-ui/react-slider": "^1.1.0",
@ -181,6 +182,22 @@
        "@luma.gl/engine": "~9.2.6"
      }
    },
    "node_modules/@deck.gl/mapbox": {
      "version": "9.2.6",
      "resolved": "https://registry.npmjs.org/@deck.gl/mapbox/-/mapbox-9.2.6.tgz",
      "integrity": "sha512-gyqCHZwiZS8LOYY6LILQQp5YCCf++VFk/wRoGskZvhb/kdEPX2Onv8iV8pXe0h9UyMLO6Mj0wl3HlJWg2ILkrg==",
      "license": "MIT",
      "dependencies": {
        "@luma.gl/constants": "^9.2.6",
        "@math.gl/web-mercator": "^4.1.0"
      },
      "peerDependencies": {
        "@deck.gl/core": "~9.2.0",
        "@luma.gl/constants": "~9.2.6",
        "@luma.gl/core": "~9.2.6",
        "@math.gl/web-mercator": "^4.1.0"
      }
    },
    "node_modules/@deck.gl/mesh-layers": {
      "version": "9.2.6",
      "resolved": "https://registry.npmjs.org/@deck.gl/mesh-layers/-/mesh-layers-9.2.6.tgz",
--- a/frontend/package.json
+++ b/frontend/package.json
@ -11,41 +11,42 @@
    "format:check": "prettier --check \"src/**/*.{ts,tsx,css}\""
  },
  "dependencies": {
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
    "@deck.gl/core": "^9.0.0",
    "@deck.gl/layers": "^9.0.0",
    "@deck.gl/geo-layers": "^9.0.0",
    "@deck.gl/layers": "^9.0.0",
    "@deck.gl/mapbox": "^9.2.6",
    "@deck.gl/react": "^9.0.0",
    "maplibre-gl": "^4.0.0",
    "react-map-gl": "^7.1.0",
    "@radix-ui/react-slider": "^1.1.0",
    "@radix-ui/react-select": "^2.0.0",
    "@radix-ui/react-slider": "^1.1.0",
    "class-variance-authority": "^0.7.0",
    "clsx": "^2.1.0",
    "maplibre-gl": "^4.0.0",
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
    "react-map-gl": "^7.1.0",
    "tailwind-merge": "^2.2.0",
    "tailwindcss-animate": "^1.0.7"
  },
  "devDependencies": {
    "webpack": "^5.90.0",
    "webpack-cli": "^5.1.0",
    "webpack-dev-server": "^5.0.0",
    "html-webpack-plugin": "^5.6.0",
    "css-loader": "^7.0.0",
    "style-loader": "^4.0.0",
    "postcss-loader": "^8.0.0",
    "ts-loader": "^9.5.0",
    "typescript": "^5.4.0",
    "@types/react": "^18.2.0",
    "@types/react-dom": "^18.2.0",
    "tailwindcss": "^3.4.0",
    "autoprefixer": "^10.4.0",
    "postcss": "^8.4.0",
    "eslint": "^8.57.0",
    "@typescript-eslint/eslint-plugin": "^7.0.0",
    "@typescript-eslint/parser": "^7.0.0",
    "autoprefixer": "^10.4.0",
    "css-loader": "^7.0.0",
    "eslint": "^8.57.0",
    "eslint-plugin-react": "^7.34.0",
    "eslint-plugin-react-hooks": "^4.6.0",
-    "prettier": "^3.2.0"
+    "html-webpack-plugin": "^5.6.0",
    "postcss": "^8.4.0",
    "postcss-loader": "^8.0.0",
    "prettier": "^3.2.0",
    "style-loader": "^4.0.0",
    "tailwindcss": "^3.4.0",
    "ts-loader": "^9.5.0",
    "typescript": "^5.4.0",
    "webpack": "^5.90.0",
    "webpack-cli": "^5.1.0",
    "webpack-dev-server": "^5.0.0"
  }
-}
+}
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
--- a/frontend/src/components/AreaPane.tsx
+++ b/frontend/src/components/AreaPane.tsx
@ -0,0 +1,243 @@
 import { useMemo } from 'react';
 import type { FeatureMeta, HexagonStatsResponse } from '../types';
 interface AreaPaneProps {
  stats: HexagonStatsResponse | null;
  globalFeatures: FeatureMeta[];
  loading: boolean;
  hexagonId: string | null;
  isHoveredPreview: boolean;
  hoverMode: boolean;
  onHoverModeChange: (enabled: boolean) => void;
  onViewProperties: () => void;
  onClose: () => void;
 }
 function formatValue(value: number): string {
  if (Math.abs(value) >= 1_000_000) return `${(value / 1_000_000).toFixed(1)}M`;
  if (Math.abs(value) >= 1_000) return `${(value / 1_000).toFixed(0)}k`;
  if (Number.isInteger(value)) return value.toLocaleString();
  return value.toFixed(1);
 }
 // Group features by their group field from globalFeatures
 function groupFeatures(
  globalFeatures: FeatureMeta[]
 ): { name: string; features: FeatureMeta[] }[] {
  const groups: { name: string; features: FeatureMeta[] }[] = [];
  const seen = new Set<string>();
  for (const feature of globalFeatures) {
    const groupName = feature.group || 'Other';
    if (!seen.has(groupName)) {
      seen.add(groupName);
      groups.push({ name: groupName, features: [] });
    }
    groups.find((group) => group.name === groupName)!.features.push(feature);
  }
  return groups;
 }
 function MiniHistogram({ counts, maxCount }: { counts: number[]; maxCount: number }) {
  if (maxCount === 0) return null;
  // Downsample to ~20 bars for display
  const targetBars = 20;
  const step = Math.max(1, Math.floor(counts.length / targetBars));
  const bars: number[] = [];
  for (let index = 0; index < counts.length; index += step) {
    let sum = 0;
    for (let offset = 0; offset < step && index + offset < counts.length; offset++) {
      sum += counts[index + offset];
    }
    bars.push(sum);
  }
  const barMax = Math.max(...bars, 1);
  return (
    <div className="flex items-end gap-px h-8 mt-1">
      {bars.map((count, index) => (
        <div
          key={index}
          className="flex-1 bg-teal-500 dark:bg-teal-400 rounded-t-sm min-w-[2px]"
          style={{ height: `${(count / barMax) * 100}%`, opacity: count > 0 ? 1 : 0.1 }}
        />
      ))}
    </div>
  );
 }
 function EnumBarChart({ counts }: { counts: Record<string, number> }) {
  const entries = Object.entries(counts).sort(([, countA], [, countB]) => countB - countA);
  const maxCount = Math.max(...entries.map(([, count]) => count), 1);
  return (
    <div className="space-y-1 mt-1">
      {entries.map(([label, count]) => (
        <div key={label} className="flex items-center gap-2 text-xs">
          <span className="w-16 truncate text-warm-500 dark:text-warm-400 text-right shrink-0">
            {label}
          </span>
          <div className="flex-1 h-3 bg-warm-100 dark:bg-navy-700 rounded overflow-hidden">
            <div
              className="h-full bg-teal-500 dark:bg-teal-400 rounded"
              style={{ width: `${(count / maxCount) * 100}%` }}
            />
          </div>
          <span className="w-8 text-warm-500 dark:text-warm-400 text-right shrink-0">{count}</span>
        </div>
      ))}
    </div>
  );
 }
 export default function AreaPane({
  stats,
  globalFeatures,
  loading,
  hexagonId,
  isHoveredPreview,
  hoverMode,
  onHoverModeChange,
  onViewProperties,
  onClose,
 }: AreaPaneProps) {
  const featureGroups = useMemo(() => groupFeatures(globalFeatures), [globalFeatures]);
  // Build lookup maps from stats
  const numericByName = useMemo(() => {
    if (!stats) return new Map();
    return new Map(stats.numeric_features.map((feature) => [feature.name, feature]));
  }, [stats]);
  const enumByName = useMemo(() => {
    if (!stats) return new Map();
    return new Map(stats.enum_features.map((feature) => [feature.name, feature]));
  }, [stats]);
  if (!hexagonId) {
    return (
      <div className="flex items-center justify-center h-full text-warm-500 dark:text-warm-400 px-4 text-center text-sm">
        Click a hexagon to view area statistics
      </div>
    );
  }
  return (
    <div className="flex flex-col h-full">
      {/* Header */}
      <div className="p-3 border-b border-warm-200 dark:border-navy-700">
        <div className="flex justify-between items-center">
          <div className="flex items-center gap-2">
            <h2 className="text-sm font-semibold dark:text-warm-100">Area Statistics</h2>
            {isHoveredPreview && (
              <span className="text-xs px-1.5 py-0.5 rounded bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
                Preview
              </span>
            )}
          </div>
          <div className="flex items-center gap-1">
            <button
              onClick={() => onHoverModeChange(!hoverMode)}
              className={`p-1 rounded ${
                hoverMode
                  ? 'text-teal-600 dark:text-teal-400 bg-teal-50 dark:bg-teal-900/30'
                  : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'
              }`}
              title={hoverMode ? 'Live preview on (click to lock)' : 'Live preview off (click to enable)'}
            >
              <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                <path strokeLinecap="round" strokeLinejoin="round" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z" />
                <path strokeLinecap="round" strokeLinejoin="round" d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z" />
              </svg>
            </button>
            <button
              onClick={onClose}
              className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-1"
            >
              <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
              </svg>
            </button>
          </div>
        </div>
        {stats && (
          <p className="text-sm text-warm-600 dark:text-warm-400 mt-1">
            {stats.count.toLocaleString()} properties
          </p>
        )}
        {stats && (
          <button
            onClick={onViewProperties}
            className="mt-2 w-full text-sm py-1.5 rounded bg-teal-600 hover:bg-teal-700 text-white font-medium"
          >
            View {stats.count.toLocaleString()} Properties
          </button>
        )}
      </div>
      {/* Stats content */}
      <div className="flex-1 overflow-y-auto">
        {loading && !stats ? (
          <div className="p-4 text-warm-500 dark:text-warm-400 text-sm">Loading...</div>
        ) : stats ? (
          <div className="p-3 space-y-4">
            {featureGroups.map((group) => {
              // Check if any feature in this group has data
              const hasData = group.features.some(
                (feature) => numericByName.has(feature.name) || enumByName.has(feature.name)
              );
              if (!hasData) return null;
              return (
                <div key={group.name}>
                  <h3 className="text-xs font-semibold text-warm-500 dark:text-warm-400 uppercase tracking-wider mb-2">
                    {group.name}
                  </h3>
                  <div className="space-y-3">
                    {group.features.map((feature) => {
                      const numericStats = numericByName.get(feature.name);
                      const enumStats = enumByName.get(feature.name);
                      if (numericStats) {
                        const maxCount = Math.max(...numericStats.histogram.counts);
                        return (
                          <div key={feature.name} className="bg-warm-50 dark:bg-navy-800 rounded p-2">
                            <div className="flex justify-between items-baseline">
                              <span className="text-xs text-warm-700 dark:text-warm-300 truncate mr-2">
                                {feature.name}
                              </span>
                              <span className="text-xs font-semibold text-teal-700 dark:text-teal-400 whitespace-nowrap">
                                {formatValue(numericStats.mean)}
                              </span>
                            </div>
                            <div className="flex justify-between text-[10px] text-warm-400 dark:text-warm-500 mt-0.5">
                              <span>{formatValue(numericStats.min)}</span>
                              <span>{formatValue(numericStats.max)}</span>
                            </div>
                            <MiniHistogram counts={numericStats.histogram.counts} maxCount={maxCount} />
                          </div>
                        );
                      }
                      if (enumStats) {
                        return (
                          <div key={feature.name} className="bg-warm-50 dark:bg-navy-800 rounded p-2">
                            <span className="text-xs text-warm-700 dark:text-warm-300">
                              {feature.name}
                            </span>
                            <EnumBarChart counts={enumStats.counts} />
                          </div>
                        );
                      }
                      return null;
                    })}
                  </div>
                </div>
              );
            })}
          </div>
        ) : null}
      </div>
    </div>
  );
 }
--- a/frontend/src/components/DataSources.tsx
+++ b/frontend/src/components/DataSources.tsx
@ -0,0 +1,10 @@
 export default function DataSources({ onNavigate }: { onNavigate: () => void }) {
  return (
    <button
      onClick={onNavigate}
      className="absolute bottom-2 right-2 bg-white/90 dark:bg-navy-800/90 backdrop-blur-sm px-3 py-2 rounded shadow-lg text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline font-semibold transition-colors"
    >
      Data Sources
    </button>
  );
 }
--- a/frontend/src/components/DataSourcesPage.tsx
+++ b/frontend/src/components/DataSourcesPage.tsx
@ -0,0 +1,214 @@
 import { useEffect, useState, useRef } from 'react';
 const DATA_SOURCES = [
  {
    id: 'price-paid',
    name: 'Price Paid Data',
    origin: 'HM Land Registry',
    use: 'Complete historical property sale prices for England and Wales. Used for the last known sale price of each property.',
    url: 'https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'epc',
    name: 'Energy Performance Certificates (EPC)',
    origin: 'Ministry of Housing, Communities & Local Government',
    use: 'Domestic Energy Performance Certificates providing floor area, number of rooms, construction age, energy ratings, property type, and built form. Fuzzy-joined with Price Paid records by address within postcode buckets.',
    url: 'https://epc.opendatacommunities.org/downloads/domestic',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'nspl',
    name: 'National Statistics Postcode Lookup (NSPL)',
    origin: 'ONS / ArcGIS',
    use: 'Maps postcodes to latitude/longitude, LSOA, and Output Area codes for geolocation and joining area-level datasets.',
    url: 'https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'iod',
    name: 'English Indices of Deprivation 2025',
    origin: 'Ministry of Housing, Communities & Local Government',
    use: 'Relative deprivation scores for 33,755 LSOAs across domains: Income, Employment, Education, Health, Crime, Living Environment, and sub-domains. Joined to properties via LSOA code.',
    url: 'https://www.gov.uk/government/statistics/english-indices-of-deprivation-2025',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'ethnicity',
    name: 'Population by Ethnicity (2021 Census)',
    origin: 'ONS',
    use: 'Population percentages by ethnic group (Asian, Black, Mixed, White, Other) per Local Authority. Joined via Local Authority District code.',
    url: 'https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/#download-the-data',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'crime',
    name: 'Street-level Crime Data',
    origin: 'data.police.uk',
    use: 'Street-level crime data from 2023 to 2025, aggregated into yearly averages by LSOA and crime type (violence, burglary, anti-social behaviour, drugs, vehicle crime, etc.).',
    url: 'https://data.police.uk/data/',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'tfl-journey-times',
    name: 'TfL Journey Times',
    origin: 'Transport for London',
    use: "Journey time calculations from postcodes to central London destinations (Bank, Waterloo, King's Cross, etc.) via public transport and cycling.",
    url: 'https://api-portal.tfl.gov.uk/',
    license: 'Powered by TfL Open Data',
  },
  {
    id: 'osm-pois',
    name: 'OpenStreetMap POIs',
    origin: 'OpenStreetMap contributors / Geofabrik',
    use: 'Points of interest extracted from the Great Britain PBF extract. Covers amenities, shops, healthcare, leisure, tourism, and more. Filtered and remapped to friendly category names.',
    url: 'https://download.geofabrik.de/europe/great-britain-latest.osm.pbf',
    license: 'Open Data Commons Open Database License (ODbL)',
  },
  {
    id: 'naptan',
    name: 'NaPTAN (Public Transport Stops)',
    origin: 'Department for Transport',
    use: 'National Public Transport Access Nodes providing station and stop locations (rail, bus, metro/tram, ferry, airport), merged into the POI dataset.',
    url: 'https://naptan.dft.gov.uk/naptan/schema/2.4/doc/NaPTANSchemaGuide-2.4-v0.57.pdf',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'noise',
    name: 'Defra Noise Mapping',
    origin: 'Defra / Environment Agency',
    use: 'Strategic noise mapping Round 4 (2022) for road, rail, and airport sources. Lden (day-evening-night 24h weighted average) at 10m grid resolution, modelled at 4m above ground. Sampled at postcode centroids via WCS GeoTIFF tiles.',
    url: 'https://environment.data.gov.uk/spatialdata/road-noise-all-metrics-england-round-4/wcs',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'ofsted',
    name: 'Ofsted School Inspections',
    origin: 'Ofsted',
    use: 'Latest inspection outcomes for state-funded schools (as at April 2025). Averaged per postcode to give a local school quality score (1=Outstanding to 4=Inadequate).',
    url: 'https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes',
    license: 'Open Government Licence v3.0',
  },
  {
    id: 'broadband',
    name: 'Ofcom Broadband Performance',
    origin: 'Ofcom',
    use: 'Fixed broadband coverage and speeds by Output Area from Connected Nations 2025. Includes max download/upload speeds across different speed tiers.',
    url: 'https://www.ofcom.org.uk/phones-and-broadband/coverage-and-speeds/connected-nations-20252/data-downloads-2025',
    license: 'Open Government Licence v3.0',
  },
 ];
 export default function DataSourcesPage() {
  const [highlightedId, setHighlightedId] = useState<string | null>(null);
  const cardRefs = useRef<Record<string, HTMLDivElement | null>>({});
  useEffect(() => {
    function handleHash() {
      const hash = window.location.hash.replace('#', '');
      if (hash && DATA_SOURCES.some((s) => s.id === hash)) {
        setHighlightedId(hash);
        // Scroll after a brief delay to allow render
        setTimeout(() => {
          cardRefs.current[hash]?.scrollIntoView({ behavior: 'smooth', block: 'center' });
        }, 100);
      } else {
        setHighlightedId(null);
      }
    }
    handleHash();
    window.addEventListener('hashchange', handleHash);
    return () => window.removeEventListener('hashchange', handleHash);
  }, []);
  return (
    <div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950 flex flex-col">
      <div className="flex-1">
        <div className="max-w-5xl mx-auto px-6 py-8">
          <h1 className="text-2xl font-bold text-warm-900 dark:text-warm-100 mb-2">Data Sources</h1>
          <p className="text-warm-600 dark:text-warm-400 mb-6">
            This application combines {DATA_SOURCES.length} open datasets covering property prices,
            energy performance, transport, demographics, crime, environment, and more.
          </p>
          <div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
            {DATA_SOURCES.map((source) => (
              <div
                key={source.id}
                id={source.id}
                ref={(el) => { cardRefs.current[source.id] = el; }}
                className={`bg-white dark:bg-navy-800 rounded-lg border p-5 ${
                  highlightedId === source.id
                    ? 'border-teal-400 ring-2 ring-teal-400'
                    : 'border-warm-200 dark:border-navy-700'
                }`}
              >
                <div className="flex items-start justify-between gap-4 mb-2">
                  <h2 className="text-lg font-semibold text-warm-900 dark:text-warm-100">{source.name}</h2>
                  <span className="shrink-0 text-xs bg-warm-100 dark:bg-navy-700 text-warm-600 dark:text-warm-300 px-2 py-1 rounded">
                    {source.license}
                  </span>
                </div>
                <p className="text-sm text-warm-500 dark:text-warm-400 mb-2">Source: {source.origin}</p>
                <p className="text-sm text-warm-700 dark:text-warm-300 mb-3">{source.use}</p>
                <a
                  href={source.url}
                  target="_blank"
                  rel="noopener noreferrer"
                  className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline break-all"
                >
                  {source.url}
                </a>
              </div>
            ))}
          </div>
        </div>
      </div>
      <footer className="bg-navy-900 text-warm-400 px-6 py-6">
        <div className="max-w-5xl mx-auto">
          <h2 className="text-sm font-semibold text-warm-300 uppercase tracking-wide mb-3">
            Attribution
          </h2>
          <ul className="space-y-1.5 text-sm">
            <li>Contains HM Land Registry data &copy; Crown copyright and database right 2025.</li>
            <li>
              Contains public sector information licensed under the{' '}
              <a
                href="https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
                target="_blank"
                rel="noopener noreferrer"
                className="text-teal-400 hover:text-teal-300 hover:underline"
              >
                Open Government Licence v3.0
              </a>
              .
            </li>
            <li>Contains OS data &copy; Crown copyright and database rights 2025.</li>
            <li>Powered by TfL Open Data.</li>
            <li>
              Contains data from{' '}
              <a
                href="https://www.openstreetmap.org/copyright"
                target="_blank"
                rel="noopener noreferrer"
                className="text-teal-400 hover:text-teal-300 hover:underline"
              >
                &copy; OpenStreetMap contributors
              </a>
              , available under the{' '}
              <a
                href="https://opendatacommons.org/licenses/odbl/"
                target="_blank"
                rel="noopener noreferrer"
                className="text-teal-400 hover:text-teal-300 hover:underline"
              >
                Open Data Commons Open Database License (ODbL)
              </a>
              .
            </li>
          </ul>
        </div>
      </footer>
    </div>
  );
 }
--- a/frontend/src/components/FAQPage.tsx
+++ b/frontend/src/components/FAQPage.tsx
@ -0,0 +1,119 @@
 import { useState } from 'react';
 interface FAQItem {
  question: string;
  answer: string;
 }
 const FAQ_ITEMS: FAQItem[] = [
  {
    question: 'What is this application?',
    answer:
      'Narrowit is an interactive map that visualises property-level data across England and Wales. It combines Land Registry sale prices, EPC energy certificates, TfL journey times, deprivation indices, crime statistics, broadband speeds, school ratings, road noise levels, ethnicity demographics, and OpenStreetMap points of interest into a single explorable view.',
  },
  {
    question: 'Where does the data come from?',
    answer:
      'All data comes from open government and community sources. Property prices are from HM Land Registry, energy certificates from MHCLG, transport times from TfL, deprivation scores from the English Indices of Deprivation 2025, crime data from data.police.uk, school ratings from Ofsted, broadband from Ofcom, noise from Defra, ethnicity from the 2021 Census, and points of interest from OpenStreetMap. See the Data Sources page for full details and links.',
  },
  {
    question: 'What are the coloured hexagons on the map?',
    answer:
      'The map uses H3 hexagons to aggregate property data at different zoom levels. Each hexagon summarises the properties within it. The colour represents the value of whichever feature you have pinned or are actively filtering — for example, average price or energy rating. Zoom in to see smaller, more detailed hexagons; zoom out for a broader overview.',
  },
  {
    question: 'How do filters work?',
    answer:
      'Use the Filters panel on the left to narrow down properties. Add a filter by clicking a feature name, then drag the range slider to set minimum and maximum values. For categorical features like property type, select or deselect individual values. Only hexagons containing properties that match all active filters are shown. Filters are combined with AND logic — every property must satisfy every filter.',
  },
  {
    question: 'What does the eye icon do on a filter?',
    answer:
      'The eye icon pins a feature as the colour source for the hexagon layer. When pinned, hexagons are coloured by that feature\'s value range even when you are not actively dragging its slider. This lets you visualise one feature while filtering on others. Click the eye icon again to unpin.',
  },
  {
    question: 'How fresh is the data?',
    answer:
      'Property prices cover all Land Registry transactions up to the most recent quarterly release. EPC data includes certificates issued up to the latest available download. Crime data spans 2023–2025 as yearly averages. TfL journey times are computed from current timetables. Deprivation indices are from the 2025 release. School ratings reflect the latest Ofsted inspections as at April 2025. Broadband data is from Ofcom Connected Nations 2025.',
  },
  {
    question: 'How are EPC records matched to Land Registry sales?',
    answer:
      'EPC and Land Registry records don\'t share a common identifier, so they are fuzzy-joined by address within each postcode bucket. The pipeline uses token-sorted string similarity with special handling for numeric tokens (house numbers, flat numbers). Matches are assigned greedily from highest similarity score downward so each record is used at most once.',
  },
  {
    question: 'What are Points of Interest (POIs)?',
    answer:
      'POIs are places like cafes, schools, supermarkets, GP surgeries, parks, and train stations extracted from OpenStreetMap and the NaPTAN public transport dataset. Use the POI panel on the right to toggle categories on and off. POIs appear as markers on the map when you are zoomed in far enough.',
  },
  {
    question: 'Can I share a specific view with someone?',
    answer:
      'Yes. The URL updates automatically as you pan, zoom, and change filters. Click the Share button in the header to copy the current URL to your clipboard. Anyone who opens that link will see the same view, filters, and active POI categories.',
  },
  {
    question: 'How do I see individual properties?',
    answer:
      'Click on a hexagon to open the Properties panel on the right. It lists all matching properties within that hexagon, showing address, price, and key features. Use "Load more" at the bottom to paginate through large hexagons.',
  },
  {
    question: 'Why are some hexagons grey?',
    answer:
      'Grey hexagons contain properties that have data but fall outside the range of your currently pinned or active feature. This gives you a sense of where properties exist even when their values are outside your selected range.',
  },
  {
    question: 'Does this work on mobile?',
    answer:
      'The app is designed for desktop browsers where you have enough screen space for the map, filter panel, and POI/properties panel side by side. It will load on mobile but the experience is best on a larger screen.',
  },
 ];
 function FAQItemCard({ item }: { item: FAQItem }) {
  const [open, setOpen] = useState(false);
  return (
    <div className="bg-white dark:bg-navy-800 rounded-lg border border-warm-200 dark:border-navy-700">
      <button
        className="w-full text-left px-5 py-4 flex items-center justify-between gap-4"
        onClick={() => setOpen(!open)}
      >
        <span className="font-medium text-warm-900 dark:text-warm-100">{item.question}</span>
        <svg
          className={`w-5 h-5 shrink-0 text-warm-400 dark:text-warm-500 transform ${open ? 'rotate-180' : ''}`}
          fill="none"
          stroke="currentColor"
          viewBox="0 0 24 24"
          strokeWidth={2}
        >
          <path strokeLinecap="round" strokeLinejoin="round" d="M19 9l-7 7-7-7" />
        </svg>
      </button>
      {open && (
        <div className="px-5 pb-4">
          <p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">{item.answer}</p>
        </div>
      )}
    </div>
  );
 }
 export default function FAQPage() {
  return (
    <div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950">
      <div className="max-w-3xl mx-auto px-6 py-8">
        <h1 className="text-2xl font-bold text-warm-900 dark:text-warm-100 mb-2">
          Frequently Asked Questions
        </h1>
        <p className="text-warm-600 dark:text-warm-400 mb-6">
          Common questions about how Narrowit works, where the data comes from, and how to use the
          map.
        </p>
        <div className="space-y-3">
          {FAQ_ITEMS.map((item, index) => (
            <FAQItemCard key={index} item={item} />
          ))}
        </div>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/Filters.tsx
+++ b/frontend/src/components/Filters.tsx
@ -1,119 +1,466 @@
 import { memo, useState, useRef, useCallback, useMemo, useEffect } from 'react';
 import { Slider } from './ui/slider';
 import { Label } from './ui/label';
-import { YEAR_MIN, YEAR_MAX, YEAR_STEP, PRICE_MIN, PRICE_MAX, PRICE_STEP } from '../lib/constants';
+import type { FeatureMeta, FeatureFilters } from '../types';
 import type { Filters as FiltersType, POICategoryGroup } from '../types';
 import { POI_CATEGORY_GROUPS } from '../types';
 interface FiltersProps {
-  filters: FiltersType;
+  features: FeatureMeta[];
-  onChange: (filters: FiltersType) => void;
+  filters: FeatureFilters;
  activeFeature: string | null;
  dragValue: [number, number] | null;
  enabledFeatures: Set<string>;
  onAddFilter: (name: string) => void;
  onRemoveFilter: (name: string) => void;
  onFilterChange: (name: string, value: [number, number] | string[]) => void;
  onDragStart: (name: string) => void;
  onDragChange: (value: [number, number]) => void;
  onDragEnd: () => void;
  zoom: number;
-  selectedPOICategories: Set<POICategoryGroup>;
+  pinnedFeature: string | null;
-  onPOICategoriesChange: (categories: Set<POICategoryGroup>) => void;
+  onTogglePin: (name: string) => void;
  onCancelPin: () => void;
  onNavigateToSource?: (slug: string, featureName: string) => void;
  openInfoFeature?: string | null;
  onClearOpenInfoFeature?: () => void;
 }
-const POI_LABELS: Record<POICategoryGroup, string> = {
+function EyeIcon({ filled, className }: { filled: boolean; className?: string }) {
-  schools: '🏫 Schools',
+  return (
-  healthcare: '🏥 Healthcare',
+    <svg
-  transport: '🚉 Transport',
+      className={className || 'w-3.5 h-3.5'}
-  parks: '🌳 Parks',
+      viewBox="0 0 24 24"
-  emergency: '🚨 Emergency',
+      fill={filled ? 'currentColor' : 'none'}
-  supermarkets: '🛒 Supermarkets',
+      stroke="currentColor"
-};
+      strokeWidth={2}
    >
      <path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z" />
      <circle cx="12" cy="12" r="3" />
    </svg>
  );
 }
-export default function Filters({
+function InfoPopup({
-  filters,
+  feature,
-  onChange,
+  onClose,
-  zoom,
+  onNavigateToSource,
-  selectedPOICategories,
+}: {
-  onPOICategoriesChange,
+  feature: FeatureMeta;
-}: FiltersProps) {
+  onClose: () => void;
-  const update = (key: keyof FiltersType, value: number) => onChange({ ...filters, [key]: value });
+  onNavigateToSource?: (slug: string, featureName: string) => void;
 }) {
  const popupRef = useRef<HTMLDivElement>(null);
-  const togglePOICategory = (category: POICategoryGroup) => {
+  useEffect(() => {
-    const newSet = new Set(selectedPOICategories);
+    function handleClickOutside(e: MouseEvent) {
-    if (newSet.has(category)) {
+      if (popupRef.current && !popupRef.current.contains(e.target as Node)) {
-      newSet.delete(category);
+        onClose();
-    } else {
+      }
      newSet.add(category);
    }
-    onPOICategoriesChange(newSet);
+    document.addEventListener('mousedown', handleClickOutside);
-  };
+    return () => document.removeEventListener('mousedown', handleClickOutside);
  }, [onClose]);
  return (
-    <div className="w-72 p-4 bg-white shadow-lg space-y-6 overflow-y-auto max-h-screen">
+    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
-      <h1 className="text-xl font-bold">UK Property Prices</h1>
+      <div
-
+        ref={popupRef}
-      <div className="text-sm text-slate-500">Zoom: {zoom.toFixed(1)}</div>
+        className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
-
+      >
-      <div className="space-y-2">
+        <div className="flex items-start justify-between mb-3">
-        <Label>
+          <h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
-          Year Range: {filters.minYear} - {filters.maxYear}
+            {feature.name}
-        </Label>
+          </h3>
-        <Slider
+          <button
-          min={YEAR_MIN}
+            onClick={onClose}
-          max={YEAR_MAX}
+            className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
-          step={YEAR_STEP}
+          >
-          value={[filters.minYear, filters.maxYear]}
+            <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
-          onValueChange={([min, max]) => onChange({ ...filters, minYear: min, maxYear: max })}
+              <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
-        />
+            </svg>
-      </div>
+          </button>
      <div className="space-y-2">
        <Label>Min Price: £{filters.minPrice.toLocaleString()}</Label>
        <Slider
          min={PRICE_MIN}
          max={PRICE_MAX}
          step={PRICE_STEP}
          value={[filters.minPrice]}
          onValueChange={([v]) => update('minPrice', v)}
        />
      </div>
      <div className="space-y-2">
        <Label>Max Price: £{filters.maxPrice.toLocaleString()}</Label>
        <Slider
          min={PRICE_MIN}
          max={PRICE_MAX}
          step={PRICE_STEP}
          value={[filters.maxPrice]}
          onValueChange={([v]) => update('maxPrice', v)}
        />
      </div>
      <div className="mt-6 p-3 bg-slate-100 rounded text-xs">
        <div className="mb-2 font-medium">Average Price</div>
        <div
          className="h-4 rounded"
          style={{
            background:
              'linear-gradient(to right, rgb(46, 204, 113), rgb(241, 196, 15), rgb(231, 76, 60), rgb(142, 68, 173))',
          }}
        ></div>
        <div className="flex justify-between mt-1">
          <span>£0</span>
          <span>£200k</span>
          <span>£400k</span>
          <span>£800k+</span>
        </div>
      </div>
      <div className="space-y-2">
        <Label>Points of Interest</Label>
        <div className="space-y-1">
          {POI_CATEGORY_GROUPS.map((category) => (
            <label key={category} className="flex items-center gap-2 cursor-pointer">
              <input
                type="checkbox"
                checked={selectedPOICategories.has(category)}
                onChange={() => togglePOICategory(category)}
                className="rounded"
              />
              <span className="text-sm">{POI_LABELS[category]}</span>
            </label>
          ))}
        </div>
        {feature.description && (
          <p className="text-xs text-warm-500 dark:text-warm-400 mb-2">{feature.description}</p>
        )}
        {feature.detail && (
          <p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">{feature.detail}</p>
        )}
        {feature.source && onNavigateToSource && (
          <button
            onClick={() => {
              onNavigateToSource(feature.source!, feature.name);
              onClose();
            }}
            className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
          >
            View data source
          </button>
        )}
      </div>
    </div>
  );
 }
 function FeatureBrowser({
  availableFeatures,
  allFeatures,
  pinnedFeature,
  onAddFilter,
  onTogglePin,
  onNavigateToSource,
  openInfoFeature,
  onClearOpenInfoFeature,
 }: {
  availableFeatures: FeatureMeta[];
  allFeatures: FeatureMeta[];
  pinnedFeature: string | null;
  onAddFilter: (name: string) => void;
  onTogglePin: (name: string) => void;
  onNavigateToSource?: (slug: string, featureName: string) => void;
  openInfoFeature?: string | null;
  onClearOpenInfoFeature?: () => void;
 }) {
  const [search, setSearch] = useState('');
  const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
  // Auto-open info popup when navigating back
  useEffect(() => {
    if (openInfoFeature) {
      const feat = allFeatures.find((f) => f.name === openInfoFeature);
      if (feat) setInfoFeature(feat);
      onClearOpenInfoFeature?.();
    }
  }, [openInfoFeature, allFeatures, onClearOpenInfoFeature]);
  const filtered = useMemo(() => {
    if (!search) return availableFeatures;
    const lower = search.toLowerCase();
    return availableFeatures.filter((f) => f.name.toLowerCase().includes(lower));
  }, [availableFeatures, search]);
  const grouped = useMemo(() => {
    const groups: { name: string; features: FeatureMeta[] }[] = [];
    const seen = new Map<string, FeatureMeta[]>();
    for (const f of filtered) {
      const g = f.group || 'Other';
      let arr = seen.get(g);
      if (!arr) {
        arr = [];
        seen.set(g, arr);
        groups.push({ name: g, features: arr });
      }
      arr.push(f);
    }
    return groups;
  }, [filtered]);
  return (
    <>
      <div className="p-2 border-b border-warm-200 dark:border-navy-700">
        <input
          type="text"
          placeholder="Search features..."
          value={search}
          onChange={(e) => setSearch(e.target.value)}
          className="w-full px-2 py-1 text-sm border rounded bg-white dark:bg-navy-800 dark:text-warm-200 border-warm-200 dark:border-navy-700 placeholder-warm-400 dark:placeholder-warm-500 focus:outline-none focus:ring-1 focus:ring-teal-400"
        />
      </div>
      <div className="flex-1 overflow-y-auto">
        {grouped.map((group) => (
          <div key={group.name}>
            <div className="px-3 py-1.5 text-xs font-bold text-warm-500 bg-warm-50 dark:bg-navy-950 dark:text-warm-400 sticky top-0">
              {group.name}
            </div>
            {group.features.map((f) => {
              const isPinned = pinnedFeature === f.name;
              return (
                <div
                  key={f.name}
                  className="flex items-start justify-between px-3 py-1.5 hover:bg-teal-50 dark:hover:bg-teal-900/30 dark:text-warm-300"
                >
                  <div className="min-w-0 mr-2">
                    <span className="text-sm truncate block">{f.name}</span>
                    {f.description && (
                      <span className="text-xs text-warm-400 dark:text-warm-500 truncate block">{f.description}</span>
                    )}
                  </div>
                  <div className="flex items-center gap-1 shrink-0 mt-0.5">
                    {f.detail && (
                      <button
                        onClick={() => setInfoFeature(f)}
                        className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
                        title="Feature info"
                      >
                        <svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                          <circle cx="12" cy="12" r="10" />
                          <path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
                        </svg>
                      </button>
                    )}
                    <button
                      onClick={() => onTogglePin(f.name)}
                      className={`p-0.5 rounded ${isPinned ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
                      title={isPinned ? 'Unpin color view' : 'Color map by this feature'}
                    >
                      <EyeIcon filled={isPinned} />
                    </button>
                    <button
                      onClick={() => onAddFilter(f.name)}
                      className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
                      title="Add filter"
                    >
                      <svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                        <path strokeLinecap="round" strokeLinejoin="round" d="M12 5v14m-7-7h14" />
                      </svg>
                    </button>
                  </div>
                </div>
              );
            })}
          </div>
        ))}
        {grouped.length === 0 && (
          <div className="px-3 py-4 text-sm text-warm-400 dark:text-warm-500 text-center">
            {search ? 'No matching features' : 'All features are active'}
          </div>
        )}
      </div>
      {infoFeature && (
        <InfoPopup
          feature={infoFeature}
          onClose={() => setInfoFeature(null)}
          onNavigateToSource={onNavigateToSource}
        />
      )}
    </>
  );
 }
 function formatValue(value: number): string {
  if (Math.abs(value) >= 1_000_000) return `${(value / 1_000_000).toFixed(1)}M`;
  if (Math.abs(value) >= 1_000) return `${(value / 1_000).toFixed(1)}k`;
  if (Number.isInteger(value)) return value.toString();
  return value.toFixed(2);
 }
 export default memo(function Filters({
  features,
  filters,
  activeFeature,
  dragValue,
  enabledFeatures,
  onAddFilter,
  onRemoveFilter,
  onFilterChange,
  onDragStart,
  onDragChange,
  onDragEnd,
  zoom,
  pinnedFeature,
  onTogglePin,
  onCancelPin,
  onNavigateToSource,
  openInfoFeature,
  onClearOpenInfoFeature,
 }: FiltersProps) {
  const availableFeatures = features.filter((f) => !enabledFeatures.has(f.name));
  const enabledFeatureList = features.filter((f) => enabledFeatures.has(f.name));
  const containerRef = useRef<HTMLDivElement>(null);
  const [splitFraction, setSplitFraction] = useState(0.65);
  const draggingRef = useRef(false);
  const handleSeparatorPointerDown = useCallback(
    (e: React.PointerEvent) => {
      e.preventDefault();
      (e.target as HTMLElement).setPointerCapture(e.pointerId);
      draggingRef.current = true;
    },
    []
  );
  const handleSeparatorPointerMove = useCallback(
    (e: React.PointerEvent) => {
      if (!draggingRef.current || !containerRef.current) return;
      const rect = containerRef.current.getBoundingClientRect();
      const y = e.clientY - rect.top;
      const fraction = Math.min(0.8, Math.max(0.15, y / rect.height));
      setSplitFraction(fraction);
    },
    []
  );
  const handleSeparatorPointerUp = useCallback(() => {
    draggingRef.current = false;
  }, []);
  return (
    <div ref={containerRef} className="w-80 flex flex-col bg-white dark:bg-navy-950 shadow-lg overflow-hidden">
      {/* Top: Active filters — user-resizable, scrollable */}
      <div className="min-h-0 flex flex-col" style={{ height: `${splitFraction * 100}%` }}>
        {/* Active Filters header */}
        <div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
          <div className="flex items-center gap-2">
            <span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Active Filters</span>
            {enabledFeatureList.length > 0 && (
              <span className="text-xs font-medium px-1.5 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
                {enabledFeatureList.length}
              </span>
            )}
          </div>
          <span className="text-xs text-warm-500 dark:text-warm-400">Zoom {zoom.toFixed(1)}</span>
        </div>
        <div className="flex-1 overflow-y-auto p-3 space-y-3">
          {enabledFeatureList.length === 0 && (
            <div className="flex flex-col items-center justify-center py-8 text-center">
              <svg className="w-8 h-8 text-warm-300 dark:text-warm-600 mb-2" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={1.5}>
                <path strokeLinecap="round" strokeLinejoin="round" d="M12 3c2.755 0 5.455.232 8.083.678.533.09.917.556.917 1.096v1.044a2.25 2.25 0 01-.659 1.591l-5.432 5.432a2.25 2.25 0 00-.659 1.591v2.927a2.25 2.25 0 01-1.244 2.013L9.75 21v-6.568a2.25 2.25 0 00-.659-1.591L3.659 7.409A2.25 2.25 0 013 5.818V4.774c0-.54.384-1.006.917-1.096A48.32 48.32 0 0112 3z" />
              </svg>
              <span className="text-sm font-medium text-warm-400 dark:text-warm-500">No active filters</span>
              <span className="text-xs text-warm-400 dark:text-warm-500 mt-1">Browse features below and click + to add a filter</span>
            </div>
          )}
          {enabledFeatureList.map((feature) => {
            if (feature.type === 'enum') {
              const selectedValues = (filters[feature.name] as string[]) || [];
              const allValues = feature.values || [];
              return (
                <div key={feature.name} className={`space-y-1 p-3 rounded ${pinnedFeature === feature.name ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}>
                  <div className="flex items-center justify-between">
                    <Label>{feature.name}</Label>
                    <div className="flex items-center gap-0.5">
                      <button
                        onClick={() => onTogglePin(feature.name)}
                        className={`p-0.5 rounded ${pinnedFeature === feature.name ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
                        title={pinnedFeature === feature.name ? 'Unpin color view' : 'Color map by this feature'}
                      >
                        <EyeIcon filled={pinnedFeature === feature.name} />
                      </button>
                      <button
                        onClick={() => onRemoveFilter(feature.name)}
                        className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 text-sm px-1"
                        title="Remove filter"
                      >
                        x
                      </button>
                    </div>
                  </div>
                  <div className="flex gap-2 text-sm mb-1">
                    <button
                      className="text-teal-600 dark:text-teal-400 hover:underline"
                      onClick={() => onFilterChange(feature.name, [...allValues])}
                    >
                      All
                    </button>
                    <button
                      className="text-teal-600 dark:text-teal-400 hover:underline"
                      onClick={() => onFilterChange(feature.name, [])}
                    >
                      None
                    </button>
                  </div>
                  <div className="space-y-0.5 max-h-40 overflow-y-auto">
                    {allValues.map((val) => (
                      <label key={val} className="flex items-center gap-1.5 text-sm cursor-pointer dark:text-warm-300">
                        <input
                          type="checkbox"
                          checked={selectedValues.includes(val)}
                          onChange={() => {
                            const next = selectedValues.includes(val)
                              ? selectedValues.filter((v) => v !== val)
                              : [...selectedValues, val];
                            onFilterChange(feature.name, next);
                          }}
                          className="rounded accent-teal-600"
                        />
                        {val}
                      </label>
                    ))}
                  </div>
                </div>
              );
            }
            // Numeric feature
            const isActive = activeFeature === feature.name;
            const isPinned = pinnedFeature === feature.name;
            const displayValue =
              isActive && dragValue
                ? dragValue
                : (filters[feature.name] as [number, number]) || [feature.min!, feature.max!];
            const step = feature.step ?? (feature.max! - feature.min!) / 100;
            return (
              <div
                key={feature.name}
                className={`space-y-1 p-3 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
              >
                <div className="flex items-center justify-between">
                  <Label>
                    {feature.name}: {formatValue(displayValue[0])} - {formatValue(displayValue[1])}
                  </Label>
                  <div className="flex items-center gap-0.5">
                    <button
                      onClick={() => onTogglePin(feature.name)}
                      className={`p-0.5 rounded ${isPinned ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
                      title={isPinned ? 'Unpin color view' : 'Color map by this feature'}
                    >
                      <EyeIcon filled={isPinned} />
                    </button>
                    <button
                      onClick={() => onRemoveFilter(feature.name)}
                      className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 text-sm px-1"
                      title="Remove filter"
                    >
                      x
                    </button>
                  </div>
                </div>
                <Slider
                  min={feature.min!}
                  max={feature.max!}
                  step={step}
                  value={[displayValue[0], displayValue[1]]}
                  onValueChange={([min, max]) => onDragChange([min, max])}
                  onPointerDown={() => onDragStart(feature.name)}
                  onPointerUp={() => onDragEnd()}
                />
              </div>
            );
          })}
        </div>
      </div>
      {/* Draggable separator */}
      <div
        className="shrink-0 h-1.5 cursor-row-resize flex items-center justify-center bg-warm-100 dark:bg-navy-800 hover:bg-warm-200 dark:hover:bg-navy-700 border-y border-warm-200 dark:border-navy-700"
        onPointerDown={handleSeparatorPointerDown}
        onPointerMove={handleSeparatorPointerMove}
        onPointerUp={handleSeparatorPointerUp}
      >
        <div className="w-8 h-0.5 rounded bg-warm-300 dark:bg-navy-600" />
      </div>
      {/* Bottom: Feature browser — fills remaining space */}
      <div className="min-h-0 flex-1 flex flex-col">
        <div className="shrink-0 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
          <span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Add Filter</span>
        </div>
        <div className="min-h-0 flex-1 flex flex-col">
          <FeatureBrowser
            availableFeatures={availableFeatures}
            allFeatures={features}
            pinnedFeature={pinnedFeature}
            onAddFilter={onAddFilter}
            onTogglePin={onTogglePin}
            onNavigateToSource={onNavigateToSource}
            openInfoFeature={openInfoFeature}
            onClearOpenInfoFeature={onClearOpenInfoFeature}
          />
        </div>
      </div>
    </div>
  );
 });
--- a/frontend/src/components/HomePage.tsx
+++ b/frontend/src/components/HomePage.tsx
@ -0,0 +1,367 @@
 import { useRef, useState, useEffect, useCallback } from 'react';
 // --- Floating hex particle canvas that reacts to scroll ---
 const HEX_COUNT = 60;
 const TAU = Math.PI * 2;
 interface Hex {
  x: number;
  y: number;
  baseY: number;
  size: number;
  opacity: number;
  speed: number; // horizontal drift px/s
  phase: number; // for gentle bob
 }
 function initHexes(w: number, h: number): Hex[] {
  const hexes: Hex[] = [];
  for (let i = 0; i < HEX_COUNT; i++) {
    const y = Math.random() * h;
    hexes.push({
      x: Math.random() * w,
      y,
      baseY: y,
      size: 8 + Math.random() * 20,
      opacity: 0.06 + Math.random() * 0.12,
      speed: 6 + Math.random() * 14,
      phase: Math.random() * TAU,
    });
  }
  return hexes;
 }
 function drawHex(ctx: CanvasRenderingContext2D, cx: number, cy: number, r: number) {
  ctx.beginPath();
  for (let i = 0; i < 6; i++) {
    const angle = (TAU / 6) * i - Math.PI / 6;
    const px = cx + r * Math.cos(angle);
    const py = cy + r * Math.sin(angle);
    if (i === 0) ctx.moveTo(px, py);
    else ctx.lineTo(px, py);
  }
  ctx.closePath();
 }
 function HexCanvas({ scrollProgress, isDark = false }: { scrollProgress: number; isDark?: boolean }) {
  const canvasRef = useRef<HTMLCanvasElement>(null);
  const hexesRef = useRef<Hex[]>([]);
  const animRef = useRef(0);
  const scrollRef = useRef(scrollProgress);
  scrollRef.current = scrollProgress;
  const isDarkRef = useRef(isDark);
  isDarkRef.current = isDark;
  useEffect(() => {
    const canvas = canvasRef.current;
    if (!canvas) return;
    const ctx = canvas.getContext('2d');
    if (!ctx) return;
    let w = 0;
    let h = 0;
    function resize() {
      const dpr = window.devicePixelRatio || 1;
      const rect = canvas!.parentElement!.getBoundingClientRect();
      w = rect.width;
      h = rect.height;
      canvas!.width = w * dpr;
      canvas!.height = h * dpr;
      canvas!.style.width = `${w}px`;
      canvas!.style.height = `${h}px`;
      ctx!.setTransform(dpr, 0, 0, dpr, 0, 0);
      hexesRef.current = initHexes(w, h);
    }
    resize();
    const ro = new ResizeObserver(resize);
    ro.observe(canvas.parentElement!);
    let prev = performance.now();
    function frame(now: number) {
      const dt = (now - prev) / 1000;
      prev = now;
      const scroll = scrollRef.current;
      ctx!.clearRect(0, 0, w, h);
      // Teal accent color, fade to 0 as user scrolls down
      const globalAlpha = Math.max(0, 1 - scroll * 2);
      for (const hex of hexesRef.current) {
        // drift right, wrap
        hex.x = (hex.x + hex.speed * dt) % (w + hex.size * 2);
        // gentle vertical bob + parallax push from scroll
        const bob = Math.sin(now / 1000 + hex.phase) * 8;
        const parallax = scroll * h * 0.3 * (hex.speed / 20);
        hex.y = hex.baseY + bob - parallax;
        // wrap vertically
        if (hex.y < -hex.size * 2) hex.y += h + hex.size * 4;
        if (hex.y > h + hex.size * 2) hex.y -= h + hex.size * 4;
        const dark = isDarkRef.current;
        ctx!.globalAlpha = hex.opacity * globalAlpha * (dark ? 0.6 : 1);
        ctx!.fillStyle = dark ? '#058172' : '#00a28c';
        drawHex(ctx!, hex.x, hex.y, hex.size);
        ctx!.fill();
        ctx!.globalAlpha = hex.opacity * 0.5 * globalAlpha * (dark ? 0.6 : 1);
        ctx!.strokeStyle = dark ? '#0a665b' : '#05c9aa';
        ctx!.lineWidth = 1;
        drawHex(ctx!, hex.x, hex.y, hex.size);
        ctx!.stroke();
      }
      animRef.current = requestAnimationFrame(frame);
    }
    animRef.current = requestAnimationFrame(frame);
    return () => {
      cancelAnimationFrame(animRef.current);
      ro.disconnect();
    };
  }, []);
  return (
    <canvas
      ref={canvasRef}
      className="absolute inset-0 pointer-events-none"
      style={{ zIndex: 0 }}
    />
  );
 }
 // --- Fade-in hook ---
 function useFadeInRef() {
  const ref = useRef<HTMLDivElement>(null);
  useEffect(() => {
    const el = ref.current;
    if (!el) return;
    const observer = new IntersectionObserver(
      ([entry]) => {
        if (entry.isIntersecting) {
          el.classList.add('fade-in-visible');
          observer.unobserve(el);
        }
      },
      { threshold: 0.15 }
    );
    observer.observe(el);
    return () => observer.disconnect();
  }, []);
  return ref;
 }
 // --- Page ---
 export default function HomePage({ onOpenDashboard, theme = 'light' }: { onOpenDashboard: () => void; theme?: 'light' | 'dark' }) {
  const scrollRef = useRef<HTMLDivElement>(null);
  const [scrollProgress, setScrollProgress] = useState(0);
  const handleScroll = useCallback(() => {
    const el = scrollRef.current;
    if (!el) return;
    const max = el.scrollHeight - el.clientHeight;
    if (max <= 0) return;
    setScrollProgress(el.scrollTop / max);
  }, []);
  useEffect(() => {
    const el = scrollRef.current;
    if (!el) return;
    el.addEventListener('scroll', handleScroll, { passive: true });
    return () => el.removeEventListener('scroll', handleScroll);
  }, [handleScroll]);
  const heroRef = useFadeInRef();
  const problemRef = useFadeInRef();
  const filtersRef = useFadeInRef();
  const howRef = useFadeInRef();
  const numbersRef = useFadeInRef();
  const ctaRef = useFadeInRef();
  return (
    <div ref={scrollRef} className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950 relative">
      <HexCanvas scrollProgress={scrollProgress} isDark={theme === 'dark'} />
      <div className="relative" style={{ zIndex: 1 }}>
        {/* Hero */}
        <div className="max-w-3xl mx-auto px-6 pt-20 pb-24">
          <div
            ref={heroRef}
            className="fade-in-section backdrop-blur-sm bg-warm-50/60 dark:bg-navy-950/60 rounded-2xl p-8 -mx-2"
          >
            <p className="text-teal-600 font-semibold tracking-wide uppercase text-sm mb-4">
              Find where to live, not just what&apos;s for sale
            </p>
            <h1 className="text-5xl font-extrabold text-navy-950 dark:text-warm-100 mb-6 leading-[1.1] tracking-tight">
              Every neighbourhood
              <br />
              in England &amp; Wales.
              <br />
              <span className="text-teal-600">One map. Your&nbsp;rules.</span>
            </h1>
            <p className="text-xl text-warm-600 dark:text-warm-400 mb-8 leading-relaxed max-w-xl">
              Set the commute, budget, school rating, noise level, and crime threshold you&apos;ll
              accept. Narrowit shows you every area that qualifies &mdash; instantly.
            </p>
            <div className="flex items-center gap-4">
              <button
                onClick={onOpenDashboard}
                className="px-7 py-3.5 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-base shadow-lg shadow-coral-500/25"
              >
                Explore the map
              </button>
              <span className="text-warm-400 text-sm">
                No signup &middot; Free &middot; Open data
              </span>
            </div>
          </div>
        </div>
        {/* The flip */}
        <div className="max-w-3xl mx-auto px-6 pb-20">
          <div ref={problemRef} className="fade-in-section">
            <div className="rounded-2xl backdrop-blur-sm bg-warm-50/40 dark:bg-navy-800/40 border border-warm-200/50 dark:border-navy-700/50 p-8">
              <div className="grid md:grid-cols-2 gap-8">
                <div>
                  <h3 className="text-sm font-semibold text-warm-400 uppercase tracking-wide mb-2">
                    The old way
                  </h3>
                  <p className="text-warm-700 dark:text-warm-300 leading-relaxed">
                    Pick a postcode. Google the schools. Check crime stats on another site. Look up
                    commute times. Realise it&apos;s too expensive. Start over. Repeat 40 times.
                  </p>
                </div>
                <div>
                  <h3 className="text-sm font-semibold text-teal-600 uppercase tracking-wide mb-2">
                    With Narrowit
                  </h3>
                  <p className="text-warm-700 dark:text-warm-300 leading-relaxed">
                    Tell the map what you need. Every hexagon that lights up is a place worth
                    looking at. Drill into any one to see individual properties, prices, and energy
                    ratings.
                  </p>
                </div>
              </div>
            </div>
          </div>
        </div>
        {/* Filter showcase */}
        <div className="max-w-4xl mx-auto px-6 pb-20">
          <div ref={filtersRef} className="fade-in-section">
            <h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-2 text-center">
              12 datasets. One slider&nbsp;each.
            </h2>
            <p className="text-warm-500 dark:text-warm-400 text-center mb-10 max-w-lg mx-auto">
              Every filter narrows the map in real time. Combine as many as you like.
            </p>
            <div className="grid grid-cols-2 md:grid-cols-4 gap-3">
              {FILTERS.map((f) => (
                <div
                  key={f.label}
                  className="rounded-xl bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 p-4 shadow-sm hover:shadow-md hover:border-teal-300 dark:hover:border-teal-600 transition-all"
                >
                  <div className="text-2xl mb-2">{f.icon}</div>
                  <div className="font-semibold text-navy-950 dark:text-warm-100 text-sm">{f.label}</div>
                  <div className="text-xs text-warm-500 dark:text-warm-400 mt-0.5">{f.example}</div>
                </div>
              ))}
            </div>
          </div>
        </div>
        {/* How it works */}
        <div className="max-w-3xl mx-auto px-6 pb-20">
          <div ref={howRef} className="fade-in-section">
            <h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-10 text-center">
              Three clicks to clarity
            </h2>
            <div className="space-y-6">
              {STEPS.map((step, i) => (
                <div key={i} className="flex gap-5 items-start">
                  <span className="shrink-0 w-10 h-10 rounded-full bg-teal-600 text-white flex items-center justify-center text-lg font-bold">
                    {i + 1}
                  </span>
                  <div>
                    <h3 className="font-semibold text-navy-950 dark:text-warm-100 text-lg">{step.title}</h3>
                    <p className="text-warm-600 dark:text-warm-400 mt-0.5">{step.body}</p>
                  </div>
                </div>
              ))}
            </div>
          </div>
        </div>
        {/* Numbers */}
        <div className="max-w-3xl mx-auto px-6 pb-20">
          <div ref={numbersRef} className="fade-in-section">
            <div className="grid grid-cols-3 gap-6 text-center">
              {STATS.map((s) => (
                <div key={s.label}>
                  <div className="text-3xl font-extrabold text-teal-600">{s.value}</div>
                  <div className="text-sm text-warm-500 dark:text-warm-400 mt-1">{s.label}</div>
                </div>
              ))}
            </div>
          </div>
        </div>
        {/* Final CTA */}
        <div className="max-w-3xl mx-auto px-6 pb-24">
          <div ref={ctaRef} className="fade-in-section text-center">
            <h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-3">Ready to narrow it down?</h2>
            <p className="text-warm-500 dark:text-warm-400 mb-8 max-w-md mx-auto">
              100% open data. No account required. Just set your filters and go.
            </p>
            <button
              onClick={onOpenDashboard}
              className="px-8 py-4 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-lg shadow-lg shadow-coral-500/25"
            >
              Open the map
            </button>
          </div>
        </div>
      </div>
    </div>
  );
 }
 // --- Data ---
 const FILTERS = [
  { icon: '\u00A3', label: 'Sale price', example: 'e.g. under \u00A3400k' },
  { icon: '\uD83D\uDE86', label: 'Commute time', example: 'e.g. < 45 min to Bank' },
  { icon: '\uD83C\uDFEB', label: 'School quality', example: 'Ofsted Outstanding' },
  { icon: '\uD83D\uDEA8', label: 'Crime rate', example: 'Low burglary areas' },
  { icon: '\u26A1', label: 'Energy rating', example: 'EPC band A\u2013C' },
  { icon: '\uD83D\uDCCF', label: 'Floor area', example: 'e.g. 80+ sqm' },
  { icon: '\uD83D\uDD07', label: 'Road noise', example: 'Below 55 dB Lden' },
  { icon: '\uD83C\uDF10', label: 'Broadband speed', example: '100+ Mbps available' },
 ];
 const STEPS = [
  {
    title: 'Add your deal-breakers',
    body: 'Slide the filters for everything you care about \u2014 price cap, max commute, school quality, noise. The map updates as you drag.',
  },
  {
    title: 'Spot the clusters',
    body: 'Hexagons light up where properties match. Zoom in and they split into finer cells. At street level you see individual postcode boundaries.',
  },
  {
    title: 'Dive into a neighbourhood',
    body: 'Click any hexagon to see every property inside it \u2014 sale prices, floor plans, energy ratings, tenure. Layer on cafes, GP surgeries, and parks from OpenStreetMap.',
  },
 ];
 const STATS = [
  { value: '26M+', label: 'property records' },
  { value: '12', label: 'open datasets' },
  { value: '1.7M', label: 'postcodes mapped' },
 ];
--- a/frontend/src/components/Map.tsx
+++ b/frontend/src/components/Map.tsx
@ -1,88 +1,41 @@
-import { useCallback, useRef, useEffect, useState, useMemo } from 'react';
+import { useCallback, useRef, useEffect, useState, useMemo, memo } from 'react';
-import { Map as MapGL } from 'react-map-gl/maplibre';
+import { Map as MapGL, useControl } from 'react-map-gl/maplibre';
-import DeckGL from '@deck.gl/react';
+import type { MapRef } from 'react-map-gl/maplibre';
 import { MapboxOverlay } from '@deck.gl/mapbox';
 import { H3HexagonLayer } from '@deck.gl/geo-layers';
-import { IconLayer } from '@deck.gl/layers';
+import { IconLayer, TextLayer } from '@deck.gl/layers';
 import type { PickingInfo } from '@deck.gl/core';
 import 'maplibre-gl/dist/maplibre-gl.css';
-import type { HexagonData, ViewState, ViewChangeParams, Bounds, POI } from '../types';
+import type { HexagonData, ViewState, ViewChangeParams, Bounds, POI, FeatureMeta } from '../types';
 interface MapProps {
  data: HexagonData[];
  pois: POI[];
  onViewChange: (params: ViewChangeParams) => void;
  viewFeature: string | null;
  colorRange: [number, number] | null;
  filterRange: [number, number] | null;
  viewSource: 'drag' | 'eye' | null;
  onCancelPin: () => void;
  features: FeatureMeta[];
  selectedHexagonId: string | null;
  hoveredHexagonId: string | null;
  onHexagonClick: (h3: string) => void;
  onHexagonHover: (h3: string | null) => void;
  initialViewState?: ViewState;
  theme?: 'light' | 'dark';
 }
 // Twemoji CDN base URL
 const TWEMOJI_BASE = 'https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72/';
-// Map category to Twemoji codepoint (emoji unicode -> hex)
+// Convert emoji to Twemoji URL
-const POI_EMOJI_CODES: Record<string, string> = {
+function emojiToTwemojiUrl(emoji: string): string {
-  // Schools
+  // Convert emoji to Unicode codepoint hex
-  elementary_school: '1f3eb', // 🏫
+  const codePoint = emoji.codePointAt(0);
-  school: '1f3eb',
+  if (!codePoint) return `${TWEMOJI_BASE}1f4cd.png`; // Default pin
-  high_school: '1f393', // 🎓
+  const hex = codePoint.toString(16);
-  preschool: '1f476', // 👶
+  return `${TWEMOJI_BASE}${hex}.png`;
  college_university: '1f393',
  private_school: '1f3eb',
  // Healthcare
  doctor: '1f3e5', // 🏥
  dentist: '1f9b7', // 🦷
  pharmacy: '1f48a', // 💊
  hospital: '1f3e5',
  public_health_clinic: '1f3e5',
  // Transport
  train_station: '1f689', // 🚉
  bus_station: '1f68c', // 🚌
  metro_station: '1f687', // 🚇
  light_rail_and_subway_stations: '1f687',
  // Parks
  park: '1f333', // 🌳
  national_park: '1f3de', // 🏞
  dog_park: '1f415', // 🐕
  // Emergency
  police_department: '1f694', // 🚔
  fire_department: '1f692', // 🚒
  // Supermarkets
  supermarket: '1f6d2', // 🛒
  grocery_store: '1f6d2',
  convenience_store: '1f3ea', // 🏪
 };
 function getPOIIconUrl(category: string): string {
  const code = POI_EMOJI_CODES[category] || '1f4cd'; // 📍 default
  return `${TWEMOJI_BASE}${code}.png`;
 }
 // Tooltip emojis (these render fine in HTML)
 const TOOLTIP_EMOJIS: Record<string, string> = {
  elementary_school: '🏫',
  school: '🏫',
  high_school: '🎓',
  preschool: '👶',
  college_university: '🎓',
  private_school: '🏫',
  doctor: '👨‍⚕️',
  dentist: '🦷',
  pharmacy: '💊',
  hospital: '🏥',
  public_health_clinic: '🏥',
  train_station: '🚉',
  bus_station: '🚌',
  metro_station: '🚇',
  light_rail_and_subway_stations: '🚇',
  park: '🌳',
  national_park: '🏞️',
  dog_park: '🐕',
  police_department: '🚔',
  fire_department: '🚒',
  supermarket: '🛒',
  grocery_store: '🛒',
  convenience_store: '🏪',
 };
 function getTooltipEmoji(category: string): string {
  return TOOLTIP_EMOJIS[category] || '📍';
 }
 const INITIAL_VIEW: ViewState = {
@ -92,61 +45,44 @@ const INITIAL_VIEW: ViewState = {
  pitch: 0,
 };
-const MAP_STYLE = 'https://basemaps.cartocdn.com/gl/positron-gl-style/style.json';
+const MAP_STYLE_LIGHT = 'https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json';
 const MAP_STYLE_DARK = 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json';
-interface ColorStop {
+// Gradient stops for normalized [0,1] values
-  price: number;
+const GRADIENT: { t: number; color: [number, number, number] }[] = [
-  color: [number, number, number];
+  { t: 0, color: [46, 204, 113] }, // Green
-}
+  { t: 0.33, color: [241, 196, 15] }, // Yellow
-
+  { t: 0.66, color: [231, 76, 60] }, // Red
-// Continuous color scale from green (low) -> yellow -> red -> purple (high)
+  { t: 1, color: [142, 68, 173] }, // Purple
 const COLOR_SCALE: ColorStop[] = [
  { price: 0, color: [46, 204, 113] }, // Green
  { price: 200000, color: [241, 196, 15] }, // Yellow
  { price: 400000, color: [231, 76, 60] }, // Red
  { price: 800000, color: [142, 68, 173] }, // Purple
 ];
-function interpolateColor(
+function normalizedToColor(t: number): [number, number, number] {
-  c1: [number, number, number],
+  if (t <= 0) return GRADIENT[0].color;
-  c2: [number, number, number],
+  if (t >= 1) return GRADIENT[GRADIENT.length - 1].color;
  t: number
 ): [number, number, number] {
  return [
    Math.round(c1[0] + (c2[0] - c1[0]) * t),
    Math.round(c1[1] + (c2[1] - c1[1]) * t),
    Math.round(c1[2] + (c2[2] - c1[2]) * t),
  ];
 }
-function priceToColor(price: number | null | undefined): [number, number, number] {
+  for (let i = 0; i < GRADIENT.length - 1; i++) {
-  if (price == null || isNaN(price)) return [128, 128, 128]; // Gray for missing data
+    const lo = GRADIENT[i];
-
+    const hi = GRADIENT[i + 1];
-  // Clamp to scale range
+    if (t >= lo.t && t <= hi.t) {
-  if (price <= COLOR_SCALE[0].price) return COLOR_SCALE[0].color;
+      const frac = (t - lo.t) / (hi.t - lo.t);
-  if (price >= COLOR_SCALE[COLOR_SCALE.length - 1].price) {
+      return [
-    return COLOR_SCALE[COLOR_SCALE.length - 1].color;
+        Math.round(lo.color[0] + (hi.color[0] - lo.color[0]) * frac),
-  }
+        Math.round(lo.color[1] + (hi.color[1] - lo.color[1]) * frac),
-
+        Math.round(lo.color[2] + (hi.color[2] - lo.color[2]) * frac),
-  // Find the two colors to interpolate between
+      ];
  for (let i = 0; i < COLOR_SCALE.length - 1; i++) {
    const lower = COLOR_SCALE[i];
    const upper = COLOR_SCALE[i + 1];
    if (price >= lower.price && price <= upper.price) {
      const t = (price - lower.price) / (upper.price - lower.price);
      return interpolateColor(lower.color, upper.color, t);
    }
  }
-
+  return GRADIENT[GRADIENT.length - 1].color;
  return COLOR_SCALE[COLOR_SCALE.length - 1].color;
 }
 function zoomToResolution(zoom: number): number {
-  if (zoom < 8.5) return 7;
+  if (zoom < 6) return 5;
  if (zoom < 7) return 6;
  if (zoom < 9.5) return 8;
  if (zoom < 11) return 9;
  if (zoom < 13) return 10;
-  return 11;
+  if (zoom < 15) return 11;
  return 12;
 }
 function getBoundsFromViewState(viewState: ViewState, width: number, height: number): Bounds {
@ -165,7 +101,6 @@ function getBoundsFromViewState(viewState: ViewState, width: number, height: num
  const halfWidthDeg = (width / 2) * degreesPerPixelLng;
  // Latitude uses Mercator projection (non-linear)
  // Convert center lat to pixel y, offset by half height, convert back to lat
  const latRad = (clampedLat * Math.PI) / 180;
  const mercatorY = (1 - Math.log(Math.tan(latRad) + 1 / Math.cos(latRad)) / Math.PI) / 2;
  const centerPixelY = mercatorY * worldSize;
@ -175,7 +110,7 @@ function getBoundsFromViewState(viewState: ViewState, width: number, height: num
  // Convert pixel Y back to latitude
  const pixelYToLat = (pixelY: number): number => {
-    const mercY = Math.max(0.001, Math.min(0.999, pixelY / worldSize)); // Clamp to avoid edge cases
+    const mercY = Math.max(0.001, Math.min(0.999, pixelY / worldSize));
    const latRadians = Math.atan(Math.sinh(Math.PI * (1 - 2 * mercY)));
    return (latRadians * 180) / Math.PI;
  };
@ -193,9 +128,215 @@ interface Dimensions {
  height: number;
 }
-export default function Map({ data, pois, onViewChange }: MapProps) {
+function DeckOverlay({
  layers,
  getTooltip,
 }: {
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  layers: any[];
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  getTooltip: any;
 }) {
  const overlay = useControl(() => new MapboxOverlay({ interleaved: true }));
  const prevLayersRef = useRef(layers);
  const prevTooltipRef = useRef(getTooltip);
  if (layers !== prevLayersRef.current || getTooltip !== prevTooltipRef.current) {
    prevLayersRef.current = layers;
    prevTooltipRef.current = getTooltip;
    overlay.setProps({ layers, getTooltip });
  }
  return null;
 }
 // Vibrant density scale: light cyan → teal → deep indigo
 const DENSITY_GRADIENT: { t: number; color: [number, number, number] }[] = [
  { t: 0, color: [130, 234, 220] },    // Light cyan (few)
  { t: 0.5, color: [20, 140, 180] },   // Ocean blue (moderate)
  { t: 1, color: [88, 28, 140] },      // Deep indigo (many)
 ];
 function countToColor(t: number): [number, number, number] {
  if (t <= 0) return DENSITY_GRADIENT[0].color;
  if (t >= 1) return DENSITY_GRADIENT[DENSITY_GRADIENT.length - 1].color;
  for (let i = 0; i < DENSITY_GRADIENT.length - 1; i++) {
    const lo = DENSITY_GRADIENT[i];
    const hi = DENSITY_GRADIENT[i + 1];
    if (t >= lo.t && t <= hi.t) {
      const frac = (t - lo.t) / (hi.t - lo.t);
      return [
        Math.round(lo.color[0] + (hi.color[0] - lo.color[0]) * frac),
        Math.round(lo.color[1] + (hi.color[1] - lo.color[1]) * frac),
        Math.round(lo.color[2] + (hi.color[2] - lo.color[2]) * frac),
      ];
    }
  }
  return DENSITY_GRADIENT[DENSITY_GRADIENT.length - 1].color;
 }
 function PostcodeSearch({
  onFlyTo,
 }: {
  onFlyTo: (lat: number, lng: number, zoom: number) => void;
 }) {
  const [query, setQuery] = useState('');
  const [error, setError] = useState<string | null>(null);
  const [loading, setLoading] = useState(false);
  const handleSubmit = useCallback(
    async (e: React.FormEvent) => {
      e.preventDefault();
      const trimmed = query.trim();
      if (!trimmed) return;
      setError(null);
      setLoading(true);
      try {
        const res = await fetch(
          `https://api.postcodes.io/postcodes/${encodeURIComponent(trimmed)}`
        );
        if (!res.ok) {
          setError('Postcode not found');
          return;
        }
        const json = await res.json();
        if (json.status === 200 && json.result) {
          onFlyTo(json.result.latitude, json.result.longitude, 14);
          setQuery('');
        } else {
          setError('Postcode not found');
        }
      } catch {
        setError('Lookup failed');
      } finally {
        setLoading(false);
      }
    },
    [query, onFlyTo]
  );
  return (
    <form onSubmit={handleSubmit} className="absolute top-3 left-3 z-10 flex flex-col gap-1">
      <div className="flex shadow-lg rounded overflow-hidden">
        <input
          type="text"
          value={query}
          onChange={(e) => {
            setQuery(e.target.value);
            setError(null);
          }}
          placeholder="Search postcode..."
          className="px-3 py-2 text-sm w-40 border-none outline-none bg-white dark:bg-navy-800 dark:text-warm-100 dark:placeholder-warm-500"
        />
        <button
          type="submit"
          disabled={loading}
          className="px-3 py-2 bg-teal-600 text-white text-sm hover:bg-teal-700 disabled:opacity-50"
        >
          {loading ? '...' : 'Go'}
        </button>
      </div>
      {error && (
        <span className="text-xs text-red-600 dark:text-red-400 bg-white/90 dark:bg-navy-800/90 rounded px-2 py-0.5 shadow">{error}</span>
      )}
    </form>
  );
 }
 function MapLegend({
  featureLabel,
  range,
  showCancel,
  onCancel,
  mode,
  enumValues,
 }: {
  featureLabel: string;
  range: [number, number];
  showCancel: boolean;
  onCancel: () => void;
  mode: 'feature' | 'density';
  enumValues?: string[];
 }) {
  const formatVal = (v: number) => {
    if (Math.abs(v) >= 1_000_000) return `${(v / 1_000_000).toFixed(1)}M`;
    if (Math.abs(v) >= 1_000) return `${(v / 1_000).toFixed(1)}k`;
    if (Number.isInteger(v)) return v.toString();
    return v.toFixed(1);
  };
  const gradientStyle =
    mode === 'density'
      ? 'linear-gradient(to right, rgb(130, 234, 220), rgb(20, 140, 180), rgb(88, 28, 140))'
      : 'linear-gradient(to right, rgb(46, 204, 113), rgb(241, 196, 15), rgb(231, 76, 60), rgb(142, 68, 173))';
  return (
    <div className="absolute top-3 right-3 z-10 bg-white dark:bg-navy-800 dark:text-warm-200 rounded shadow-lg p-3 text-xs min-w-[160px]">
      <div className="flex items-center justify-between mb-2">
        <span className="font-semibold text-sm">{featureLabel}</span>
        {showCancel && (
          <button
            onClick={onCancel}
            className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 ml-2"
            title="Clear color view"
          >
            <svg
              className="w-4 h-4"
              fill="none"
              stroke="currentColor"
              viewBox="0 0 24 24"
              strokeWidth={2}
            >
              <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
            </svg>
          </button>
        )}
      </div>
      <div
        className="h-3 rounded"
        style={{ background: gradientStyle }}
      />
      <div className="flex justify-between mt-1 text-warm-600 dark:text-warm-400">
        {mode === 'density' ? (
          <>
            <span>Few</span>
            <span>Many</span>
          </>
        ) : enumValues && enumValues.length > 0 ? (
          <>
            <span>{enumValues[0]}</span>
            <span>{enumValues[enumValues.length - 1]}</span>
          </>
        ) : (
          <>
            <span>{formatVal(range[0])}</span>
            <span>{formatVal(range[1])}</span>
          </>
        )}
      </div>
    </div>
  );
 }
 export default memo(function Map({
  data,
  pois,
  onViewChange,
  viewFeature,
  colorRange,
  filterRange,
  viewSource,
  onCancelPin,
  features,
  selectedHexagonId,
  hoveredHexagonId,
  onHexagonClick,
  onHexagonHover,
  initialViewState,
  theme = 'light',
 }: MapProps) {
  const containerRef = useRef<HTMLDivElement>(null);
-  const [viewState, setViewState] = useState<ViewState>(INITIAL_VIEW);
+  const [viewState, setViewState] = useState<ViewState>(initialViewState || INITIAL_VIEW);
  const [dimensions, setDimensions] = useState<Dimensions>({ width: 0, height: 0 });
  // Track container dimensions with ResizeObserver
@ -218,18 +359,69 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
  useEffect(() => {
    if (dimensions.width === 0 || dimensions.height === 0) return;
-    const bounds = getBoundsFromViewState(viewState, dimensions.width, dimensions.height);
+    const raw = getBoundsFromViewState(viewState, dimensions.width, dimensions.height);
    const resolution = zoomToResolution(viewState.zoom);
-    onViewChange({ resolution, bounds, zoom: viewState.zoom });
+    // Quantize bounds to 0.01° to reduce state churn and improve backend cache hits
    const QUANT = 0.01;
    const bounds: Bounds = {
      south: Math.floor(raw.south / QUANT) * QUANT,
      west: Math.floor(raw.west / QUANT) * QUANT,
      north: Math.ceil(raw.north / QUANT) * QUANT,
      east: Math.ceil(raw.east / QUANT) * QUANT,
    };
    onViewChange({
      resolution,
      bounds,
      zoom: viewState.zoom,
      latitude: viewState.latitude,
      longitude: viewState.longitude,
    });
  }, [viewState, dimensions, onViewChange]);
-  const handleViewStateChange = useCallback((params: { viewState: unknown }) => {
+  const handleMove = useCallback((evt: { viewState: ViewState }) => {
-    const newViewState = params.viewState as ViewState;
+    setViewState(evt.viewState);
    setViewState(newViewState);
  }, []);
-  // Popup state for POI hover (using screen coordinates)
+  const handleFlyTo = useCallback((lat: number, lng: number, zoom: number) => {
    setViewState((prev) => ({ ...prev, latitude: lat, longitude: lng, zoom }));
  }, []);
  const themeRef = useRef(theme);
  themeRef.current = theme;
  // Make place labels more legible over the colored hexagons
  const handleMapLoad = useCallback(
    (evt: { target: MapRef['getMap'] extends () => infer M ? M : never }) => {
      const map = evt.target;
      if (themeRef.current === 'light') {
        for (const layer of map.getStyle().layers || []) {
          if (layer.type !== 'symbol') continue;
          map.setPaintProperty(layer.id, 'text-halo-color', 'rgba(255,255,255,1)');
          map.setPaintProperty(layer.id, 'text-halo-width', 2);
          map.setPaintProperty(layer.id, 'text-color', '#222');
        }
        // Make water more prominent
        for (const layer of map.getStyle().layers || []) {
          if (layer.id === 'water' || layer.id.startsWith('water')) {
            map.setPaintProperty(layer.id, 'fill-color', '#6baed6');
          }
        }
      }
      try {
        map.setLayoutProperty('building', 'visibility', 'none');
        map.setLayoutProperty('building-top', 'visibility', 'none');
      } catch {
        // layers may not exist in dark style
      }
    },
    []
  );
  const mapStyle = theme === 'dark' ? MAP_STYLE_DARK : MAP_STYLE_LIGHT;
  // Popup state for POI hover
  const [popupInfo, setPopupInfo] = useState<{
    x: number;
    y: number;
@ -250,24 +442,149 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
    }
  }, []);
-  const layers = useMemo(
+  // Compute count range for count-based coloring
-    () => [
+  const countRange = useMemo(() => {
    if (data.length === 0) return { min: 0, max: 1 };
    let min = Infinity;
    let max = -Infinity;
    for (const d of data) {
      const c = d.count as number;
      if (c < min) min = c;
      if (c > max) max = c;
    }
    if (min === max) return { min, max: min + 1 };
    return { min, max };
  }, [data]);
  // Memoize feature lookup to avoid new reference each render
  const colorFeatureMeta = useMemo(
    () => (viewFeature ? features.find((f) => f.name === viewFeature) || null : null),
    [viewFeature, features]
  );
  // Use refs for values that change during drag so layers aren't recreated
  const viewFeatureRef = useRef(viewFeature);
  viewFeatureRef.current = viewFeature;
  const colorRangeRef = useRef(colorRange);
  colorRangeRef.current = colorRange;
  const filterRangeRef = useRef(filterRange);
  filterRangeRef.current = filterRange;
  const colorFeatureMetaRef = useRef(colorFeatureMeta);
  colorFeatureMetaRef.current = colorFeatureMeta;
  const countRangeRef = useRef(countRange);
  countRangeRef.current = countRange;
  const selectedHexagonIdRef = useRef(selectedHexagonId);
  selectedHexagonIdRef.current = selectedHexagonId;
  const hoveredHexagonIdRef = useRef(hoveredHexagonId);
  hoveredHexagonIdRef.current = hoveredHexagonId;
  // Stable click handler using ref
  const onHexagonClickRef = useRef(onHexagonClick);
  onHexagonClickRef.current = onHexagonClick;
  const handleHexagonClick = useCallback((info: PickingInfo<HexagonData>) => {
    if (info.object && 'h3' in info.object) {
      onHexagonClickRef.current(info.object.h3);
    }
  }, []);
  // Stable hover handler using ref
  const onHexagonHoverRef = useRef(onHexagonHover);
  onHexagonHoverRef.current = onHexagonHover;
  const handleHexagonHover = useCallback((info: PickingInfo<HexagonData>) => {
    if (info.object && 'h3' in info.object) {
      onHexagonHoverRef.current(info.object.h3);
    } else {
      onHexagonHoverRef.current(null);
    }
  }, []);
  // Stable hover handler using ref
  const handlePoiHoverRef = useRef(handlePoiHover);
  handlePoiHoverRef.current = handlePoiHover;
  const stablePoiHover = useCallback((info: PickingInfo<POI>) => {
    handlePoiHoverRef.current(info);
  }, []);
  // Derive a trigger value from color-affecting state — avoids useEffect+setState double-render
  const colorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${countRange.min}|${countRange.max}|${selectedHexagonId}|${hoveredHexagonId}`;
  // Hexagon layer — only recreated when data or color trigger changes
  const hexLayer = useMemo(
    () =>
      new H3HexagonLayer<HexagonData>({
        id: 'h3-hexagons',
        data,
        getHexagon: (d) => d.h3,
-        getFillColor: (d) => priceToColor(d.avg_price),
+        getFillColor: (d) => {
          const vf = viewFeatureRef.current;
          const clr = colorRangeRef.current;
          const fr = filterRangeRef.current;
          const cfm = colorFeatureMetaRef.current;
          if (vf && clr && cfm) {
            const val = d[`min_${vf}`];
            if (val == null) return [128, 128, 128, 80] as [number, number, number, number];
            // Gray out hexagons outside filter range
            if (fr) {
              const minVal = d[`min_${vf}`] as number;
              const maxVal = d[`max_${vf}`] as number;
              if (maxVal < fr[0] || minVal > fr[1]) {
                return [180, 180, 180, 60] as [number, number, number, number];
              }
            }
            // Color using full slider range
            const range = clr[1] - clr[0];
            if (range === 0) return [...GRADIENT[0].color, 200] as [number, number, number, number];
            const t = ((val as number) - clr[0]) / range;
            const rgb = normalizedToColor(Math.max(0, Math.min(1, t)));
            return [...rgb, 200] as [number, number, number, number];
          }
          const cr = countRangeRef.current;
          const c = d.count as number;
          const t = (c - cr.min) / (cr.max - cr.min);
          return [...countToColor(Math.max(0, Math.min(1, t))), 200] as [
            number,
            number,
            number,
            number,
          ];
        },
        getLineColor: (d) => {
          if (d.h3 === selectedHexagonIdRef.current) return [255, 255, 255, 255] as [number, number, number, number];
          if (d.h3 === hoveredHexagonIdRef.current) return [29, 228, 195, 200] as [number, number, number, number];
          return [0, 0, 0, 0] as [number, number, number, number];
        },
        getLineWidth: (d) => {
          if (d.h3 === selectedHexagonIdRef.current) return 3;
          if (d.h3 === hoveredHexagonIdRef.current) return 2;
          return 0;
        },
        lineWidthUnits: 'pixels',
        updateTriggers: {
          getFillColor: [colorTrigger],
          getLineColor: [colorTrigger],
          getLineWidth: [colorTrigger],
        },
        extruded: false,
        pickable: true,
-        opacity: 0.5,
+        opacity: 1,
        highPrecision: true,
        onClick: handleHexagonClick,
        onHover: handleHexagonHover,
        // @ts-expect-error beforeId is a MapboxOverlay interleave prop, not typed in LayerProps
        beforeId: 'waterway_label',
      }),
    [data, colorTrigger, handleHexagonClick, handleHexagonHover]
  );
  // POI layer — independent, only recreated when POI data changes
  const poiLayer = useMemo(
    () =>
      new IconLayer<POI>({
        id: 'poi-icons',
        data: pois,
        getPosition: (d) => [d.lng, d.lat],
        getIcon: (d) => ({
-          url: getPOIIconUrl(d.category),
+          url: emojiToTwemojiUrl(d.emoji),
          width: 72,
          height: 72,
        }),
@ -275,48 +592,89 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
        sizeMinPixels: 20,
        sizeMaxPixels: 40,
        pickable: true,
-        onHover: handlePoiHover,
+        onHover: stablePoiHover,
      }),
-    ],
+    [pois, stablePoiHover]
    [data, pois, handlePoiHover]
  );
  // Postcode labels on high-res hexagons (resolution 11+, zoom >= 13)
  const postcodeData = useMemo(
    () => data.filter((d) => d.postcode && d.lat != null && d.lon != null),
    [data]
  );
-  // Tooltip for hexagons only (POIs use MapLibre popup)
+  const showPostcodes = viewState.zoom >= 13;
-  const getTooltip = useCallback(({ object }: { object?: HexagonData }) => {
+  const postcodeLayer = useMemo(
-    if (!object || !('h3' in object)) return null;
+    () =>
      showPostcodes
        ? new TextLayer<HexagonData>({
            id: 'postcode-labels',
            data: postcodeData,
            getPosition: (d) => [d.lon as number, d.lat as number],
            getText: (d) => d.postcode as string,
            getSize: 11,
            getColor: theme === 'dark' ? [220, 220, 220, 220] : [30, 30, 30, 220],
            getTextAnchor: 'middle',
            getAlignmentBaseline: 'center',
            fontFamily: 'Inter, system-ui, sans-serif',
            fontWeight: 600,
            outlineWidth: 2,
            outlineColor: theme === 'dark' ? [30, 30, 30, 200] : [255, 255, 255, 200],
            billboard: false,
            sizeUnits: 'pixels',
            sizeMinPixels: 10,
            sizeMaxPixels: 14,
          })
        : null,
    [postcodeData, showPostcodes, theme]
  );
-    const hex = object as HexagonData;
+  const layers = useMemo(
-    return {
+    () => [hexLayer, poiLayer, ...(postcodeLayer ? [postcodeLayer] : [])],
-      html: `<div style="padding: 8px; font-size: 14px;">
+    [hexLayer, poiLayer, postcodeLayer]
-        <strong>Avg: £${hex.avg_price?.toLocaleString() || 'N/A'}</strong>
+  );
        <div style="color: #666; font-size: 12px;">
          ${hex.count} sales<br/>
          Range: £${hex.min_price?.toLocaleString()} - £${hex.max_price?.toLocaleString()}
        </div>
      </div>`,
      style: {
        backgroundColor: 'white',
        borderRadius: '4px',
        boxShadow: '0 2px 4px rgba(0,0,0,0.2)',
      },
    };
  }, []);
  return (
    <div className="flex-1 h-full relative" ref={containerRef}>
-      <DeckGL
+      <MapGL
-        viewState={viewState}
+        {...viewState}
-        controller
+        onMove={handleMove}
-        layers={layers}
+        onLoad={handleMapLoad as never}
-        onViewStateChange={handleViewStateChange as never}
+        mapStyle={mapStyle}
-        getTooltip={getTooltip as never}
+        style={{ width: '100%', height: '100%' }}
        attributionControl={false}
        dragRotate={false}
        touchZoomRotate={true}
        touchPitch={false}
        keyboard={true}
        pitchWithRotate={false}
        minZoom={5}
        maxBounds={[-12, 49, 4, 62]}
      >
-        <MapGL mapStyle={MAP_STYLE} />
+        <DeckOverlay layers={layers} getTooltip={null} />
-      </DeckGL>
+      </MapGL>
      <PostcodeSearch onFlyTo={handleFlyTo} />
      {viewFeature && colorRange && colorFeatureMeta ? (
        <MapLegend
          featureLabel={colorFeatureMeta.name}
          range={colorRange}
          showCancel={viewSource === 'eye'}
          onCancel={onCancelPin}
          mode="feature"
          enumValues={colorFeatureMeta.type === 'enum' ? colorFeatureMeta.values : undefined}
        />
      ) : (
        <MapLegend
          featureLabel="Property density"
          range={[0, 0]}
          showCancel={false}
          onCancel={onCancelPin}
          mode="density"
        />
      )}
      {popupInfo && (
        <div
-          className="absolute pointer-events-none bg-white rounded shadow-lg p-2 text-sm"
+          className="absolute pointer-events-none bg-white dark:bg-navy-800 rounded shadow-lg p-2 text-sm dark:text-warm-200"
          style={{
            left: popupInfo.x,
            top: popupInfo.y - 40,
@ -324,14 +682,10 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
            zIndex: 9999,
          }}
        >
-          <strong>
+          <strong>{popupInfo.name}</strong>
-            {getTooltipEmoji(popupInfo.category)} {popupInfo.name}
+          <div className="text-gray-500 dark:text-warm-400 text-xs">{popupInfo.category}</div>
          </strong>
          <div className="text-gray-500 text-xs">
            {popupInfo.category.replace(/_/g, ' ')}
          </div>
        </div>
      )}
    </div>
  );
-}
+});
--- a/frontend/src/components/POIPane.tsx
+++ b/frontend/src/components/POIPane.tsx
@ -0,0 +1,297 @@
 import { useState, useRef, useEffect, useCallback } from 'react';
 import type { POICategoryGroup } from '../types';
 interface POIPaneProps {
  groups: POICategoryGroup[];
  selectedCategories: Set<string>;
  onCategoriesChange: (categories: Set<string>) => void;
  poiCount: number;
  onNavigateToSource?: (slug: string) => void;
 }
 export default function POIPane({
  groups,
  selectedCategories,
  onCategoriesChange,
  poiCount,
  onNavigateToSource,
 }: POIPaneProps) {
  const [dropdownOpen, setDropdownOpen] = useState(false);
  const [searchTerm, setSearchTerm] = useState('');
  const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set());
  const [showInfo, setShowInfo] = useState(false);
  const dropdownRef = useRef<HTMLDivElement>(null);
  const infoPopupRef = useRef<HTMLDivElement>(null);
  // Close dropdown when clicking outside
  useEffect(() => {
    function handleClickOutside(event: MouseEvent) {
      if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
        setDropdownOpen(false);
      }
    }
    document.addEventListener('mousedown', handleClickOutside);
    return () => document.removeEventListener('mousedown', handleClickOutside);
  }, []);
  // Close info popup when clicking outside
  useEffect(() => {
    if (!showInfo) return;
    function handleClickOutside(e: MouseEvent) {
      if (infoPopupRef.current && !infoPopupRef.current.contains(e.target as Node)) {
        setShowInfo(false);
      }
    }
    document.addEventListener('mousedown', handleClickOutside);
    return () => document.removeEventListener('mousedown', handleClickOutside);
  }, [showInfo]);
  const allCategories = groups.flatMap((g) => g.categories);
  const toggleCategory = (category: string) => {
    const newSet = new Set(selectedCategories);
    if (newSet.has(category)) {
      newSet.delete(category);
    } else {
      newSet.add(category);
    }
    onCategoriesChange(newSet);
  };
  const selectAll = () => {
    onCategoriesChange(new Set(allCategories));
  };
  const selectNone = () => {
    onCategoriesChange(new Set());
  };
  const toggleGroup = useCallback(
    (groupName: string) => {
      const group = groups.find((g) => g.name === groupName);
      if (!group) return;
      const allSelected = group.categories.every((c) => selectedCategories.has(c));
      const newSet = new Set(selectedCategories);
      if (allSelected) {
        group.categories.forEach((c) => newSet.delete(c));
      } else {
        group.categories.forEach((c) => newSet.add(c));
      }
      onCategoriesChange(newSet);
    },
    [groups, selectedCategories, onCategoriesChange]
  );
  const toggleCollapse = (groupName: string) => {
    setCollapsedGroups((prev) => {
      const next = new Set(prev);
      if (next.has(groupName)) {
        next.delete(groupName);
      } else {
        next.add(groupName);
      }
      return next;
    });
  };
  const lowerSearch = searchTerm.toLowerCase();
  // Filter groups and categories by search term
  const filteredGroups = groups
    .map((group) => {
      if (!searchTerm) return group;
      const matchingCats = group.categories.filter((c) => c.toLowerCase().includes(lowerSearch));
      const groupMatches = group.name.toLowerCase().includes(lowerSearch);
      if (groupMatches) return group;
      if (matchingCats.length === 0) return null;
      return { ...group, categories: matchingCats };
    })
    .filter(Boolean) as POICategoryGroup[];
  const selectedCount = selectedCategories.size;
  return (
    <div className="w-72 p-4 bg-white dark:bg-navy-950 shadow-lg space-y-4 overflow-y-auto max-h-screen">
      <div className="flex items-center gap-2">
        <h2 className="text-xl font-bold dark:text-warm-100">Points of Interest</h2>
        <button
          onClick={() => setShowInfo(true)}
          className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
          title="Data source info"
        >
          <svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
            <circle cx="12" cy="12" r="10" />
            <path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
          </svg>
        </button>
      </div>
      {showInfo && (
        <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
          <div
            ref={infoPopupRef}
            className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
          >
            <div className="flex items-start justify-between mb-3">
              <h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
                Points of Interest
              </h3>
              <button
                onClick={() => setShowInfo(false)}
                className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
              >
                <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                  <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
                </svg>
              </button>
            </div>
            <p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">
              Points of interest are sourced from OpenStreetMap via Geofabrik extracts.
              Categories include public transport stops, shops, restaurants, healthcare
              facilities, leisure venues, and more. Data is filtered and mapped to
              friendly names with exhaustive category coverage.
            </p>
            {onNavigateToSource && (
              <button
                onClick={() => {
                  onNavigateToSource('osm-pois');
                  setShowInfo(false);
                }}
                className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
              >
                View data source
              </button>
            )}
          </div>
        </div>
      )}
      <div className="space-y-2" ref={dropdownRef}>
        <button
          onClick={() => setDropdownOpen(!dropdownOpen)}
          className="w-full flex items-center justify-between px-3 py-2 text-sm border border-warm-300 dark:border-navy-700 rounded hover:border-warm-400 bg-white dark:bg-navy-800 dark:text-warm-200"
        >
          <span className="truncate text-left">
            {selectedCount === 0
              ? 'Select categories...'
              : selectedCount === allCategories.length
                ? 'All categories'
                : `${selectedCount} selected`}
          </span>
          <svg
            className={`w-4 h-4 ml-2 flex-shrink-0 transition-transform ${dropdownOpen ? 'rotate-180' : ''}`}
            fill="none"
            stroke="currentColor"
            viewBox="0 0 24 24"
          >
            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
          </svg>
        </button>
        {dropdownOpen && (
          <div className="border border-warm-300 dark:border-navy-700 rounded shadow-lg bg-white dark:bg-navy-800">
            <div className="flex gap-2 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
              <button onClick={selectAll} className="text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300">
                All
              </button>
              <span className="text-xs text-warm-300 dark:text-warm-600">|</span>
              <button onClick={selectNone} className="text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300">
                None
              </button>
            </div>
            <div className="px-3 py-2 border-b border-warm-200 dark:border-navy-700">
              <input
                type="text"
                placeholder="Search categories..."
                value={searchTerm}
                onChange={(e) => setSearchTerm(e.target.value)}
                className="w-full px-2 py-1 text-sm border border-warm-300 dark:border-navy-700 rounded bg-white dark:bg-navy-950 dark:text-warm-200 dark:placeholder-warm-500"
              />
            </div>
            <div className="max-h-96 overflow-y-auto py-1">
              {filteredGroups.map((group) => {
                const groupSelected = group.categories.filter((c) =>
                  selectedCategories.has(c)
                ).length;
                const allInGroupSelected = groupSelected === group.categories.length;
                const someInGroupSelected = groupSelected > 0 && !allInGroupSelected;
                const isCollapsed = collapsedGroups.has(group.name) && !searchTerm;
                return (
                  <div key={group.name}>
                    <div className="flex items-center gap-1 px-3 py-1.5 bg-warm-50 dark:bg-navy-950 border-y border-warm-100 dark:border-navy-700">
                      <button
                        onClick={() => toggleCollapse(group.name)}
                        className="p-0.5 text-warm-400 hover:text-warm-600"
                      >
                        <svg
                          className={`w-3 h-3 transition-transform ${isCollapsed ? '' : 'rotate-90'}`}
                          fill="none"
                          stroke="currentColor"
                          viewBox="0 0 24 24"
                        >
                          <path
                            strokeLinecap="round"
                            strokeLinejoin="round"
                            strokeWidth={2}
                            d="M9 5l7 7-7 7"
                          />
                        </svg>
                      </button>
                      <label className="flex items-center gap-2 flex-1 cursor-pointer">
                        <input
                          type="checkbox"
                          checked={allInGroupSelected}
                          ref={(el) => {
                            if (el) el.indeterminate = someInGroupSelected;
                          }}
                          onChange={() => toggleGroup(group.name)}
                          className="rounded accent-teal-600"
                        />
                        <span className="text-xs font-semibold text-warm-700 dark:text-warm-300">{group.name}</span>
                      </label>
                      <span className="text-xs text-warm-400">
                        {groupSelected}/{group.categories.length}
                      </span>
                    </div>
                    {!isCollapsed &&
                      group.categories.map((category) => (
                        <label
                          key={category}
                          className="flex items-center gap-2 px-3 pl-8 py-1.5 hover:bg-warm-50 dark:hover:bg-navy-700 cursor-pointer dark:text-warm-300"
                        >
                          <input
                            type="checkbox"
                            checked={selectedCategories.has(category)}
                            onChange={() => toggleCategory(category)}
                            className="rounded accent-teal-600"
                          />
                          <span className="text-sm flex-1">{category}</span>
                        </label>
                      ))}
                  </div>
                );
              })}
            </div>
          </div>
        )}
      </div>
      {selectedCount > 0 && (
        <div className="p-3 bg-teal-50 dark:bg-teal-900/30 rounded text-sm">
          <div className="font-medium text-teal-900 dark:text-teal-300">
            {poiCount.toLocaleString()} POI{poiCount !== 1 ? 's' : ''} visible
          </div>
          <div className="text-xs text-teal-700 dark:text-teal-400 mt-1">
            {selectedCount} categor{selectedCount !== 1 ? 'ies' : 'y'} selected
          </div>
        </div>
      )}
      <div className="p-3 bg-warm-100 dark:bg-navy-800 rounded text-xs text-warm-600 dark:text-warm-400">
        <p>Select categories to display POIs on the map.</p>
        <p className="mt-2">Zoom in for better visibility of individual locations.</p>
      </div>
    </div>
  );
 }
--- a/frontend/src/components/PropertiesPane.tsx
+++ b/frontend/src/components/PropertiesPane.tsx
@ -0,0 +1,316 @@
 import React, { useMemo, useState, useRef, useEffect } from 'react';
 import { Property } from '../types';
 interface PropertiesPaneProps {
  properties: Property[];
  total: number;
  loading: boolean;
  hexagonId: string | null;
  onLoadMore: () => void;
  onClose: () => void;
  onNavigateToSource?: (slug: string) => void;
  isHoveredPreview?: boolean;
  hoverMode?: boolean;
  onHoverModeChange?: (enabled: boolean) => void;
 }
 type SortBy = 'price' | 'size' | 'energy';
 export function PropertiesPane({
  properties,
  total,
  loading,
  hexagonId,
  onLoadMore,
  onClose,
  onNavigateToSource,
  isHoveredPreview,
  hoverMode,
  onHoverModeChange,
 }: PropertiesPaneProps) {
  const [sortBy, setSortBy] = useState<SortBy>('price');
  const [search, setSearch] = useState('');
  const [showInfo, setShowInfo] = useState(false);
  const infoPopupRef = useRef<HTMLDivElement>(null);
  useEffect(() => {
    if (!showInfo) return;
    function handleClickOutside(e: MouseEvent) {
      if (infoPopupRef.current && !infoPopupRef.current.contains(e.target as Node)) {
        setShowInfo(false);
      }
    }
    document.addEventListener('mousedown', handleClickOutside);
    return () => document.removeEventListener('mousedown', handleClickOutside);
  }, [showInfo]);
  // Filter and sort properties
  const filteredAndSorted = useMemo(() => {
    const query = search.trim().toLowerCase();
    const filtered = query
      ? properties.filter((p) => {
          const addr = (p.address || '').toLowerCase();
          const pc = (p.postcode || '').toLowerCase();
          return addr.includes(query) || pc.includes(query);
        })
      : properties;
    return [...filtered].sort((a, b) => {
      switch (sortBy) {
        case 'price':
          return ((b.latest_price as number) || 0) - ((a.latest_price as number) || 0);
        case 'size':
          return ((b.total_floor_area as number) || 0) - ((a.total_floor_area as number) || 0);
        case 'energy':
          return (a.current_energy_rating || 'Z').localeCompare(b.current_energy_rating || 'Z');
      }
    });
  }, [properties, sortBy, search]);
  if (!hexagonId) {
    return (
      <div className="flex items-center justify-center h-full text-warm-500 dark:text-warm-400">
        Click a hexagon to view properties
      </div>
    );
  }
  return (
    <div className="flex flex-col h-full">
      {/* Header */}
      <div className="p-4 border-b border-warm-200 dark:border-navy-700">
        <div className="flex justify-between items-center">
          <div className="flex items-center gap-2">
            <h2 className="text-lg font-semibold dark:text-warm-100">Properties</h2>
            {isHoveredPreview && (
              <span className="text-xs px-1.5 py-0.5 rounded bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
                Preview
              </span>
            )}
            <button
              onClick={() => setShowInfo(true)}
              className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
              title="Data source info"
            >
              <svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                <circle cx="12" cy="12" r="10" />
                <path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
              </svg>
            </button>
          </div>
          <div className="flex items-center gap-1">
            {onHoverModeChange && (
              <button
                onClick={() => onHoverModeChange(!hoverMode)}
                className={`p-1 rounded ${
                  hoverMode
                    ? 'text-teal-600 dark:text-teal-400 bg-teal-50 dark:bg-teal-900/30'
                    : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'
                }`}
                title={hoverMode ? 'Live preview on (click to lock)' : 'Live preview off (click to enable)'}
              >
                <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                  <path strokeLinecap="round" strokeLinejoin="round" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z" />
                  <path strokeLinecap="round" strokeLinejoin="round" d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z" />
                </svg>
              </button>
            )}
            <button
              onClick={onClose}
              className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-1"
            >
              <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
              </svg>
            </button>
          </div>
        </div>
        <p className="text-sm text-warm-600 dark:text-warm-400">
          {search.trim()
            ? `${filteredAndSorted.length} match${filteredAndSorted.length !== 1 ? 'es' : ''} in ${properties.length} loaded`
            : `Showing ${properties.length} of ${total} properties`}
        </p>
        {showInfo && (
          <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
            <div
              ref={infoPopupRef}
              className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
            >
              <div className="flex items-start justify-between mb-3">
                <h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
                  Property Data
                </h3>
                <button
                  onClick={() => setShowInfo(false)}
                  className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
                >
                  <svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
                    <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
                  </svg>
                </button>
              </div>
              <p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">
                Property data combines Energy Performance Certificates (EPC) with HM Land
                Registry Price Paid records, fuzzy-matched by address within each postcode.
                Includes floor area, energy ratings, construction age, and tenure from EPC
                surveys, plus the most recent sale price from the Land Registry.
              </p>
              {onNavigateToSource && (
                <button
                  onClick={() => {
                    onNavigateToSource('epc');
                    setShowInfo(false);
                  }}
                  className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
                >
                  View data source
                </button>
              )}
            </div>
          </div>
        )}
      </div>
      {/* Search and sort controls */}
      <div className="p-2 border-b border-warm-200 dark:border-navy-700 space-y-2">
        <input
          type="text"
          value={search}
          onChange={(e) => setSearch(e.target.value)}
          placeholder="Search by address or postcode..."
          className="w-full p-2 border border-warm-300 dark:border-navy-700 rounded text-sm bg-white dark:bg-navy-800 dark:text-warm-200 placeholder-warm-400 dark:placeholder-warm-500"
        />
        <select
          value={sortBy}
          onChange={(e) => setSortBy(e.target.value as SortBy)}
          className="w-full p-2 border border-warm-300 dark:border-navy-700 rounded text-sm bg-white dark:bg-navy-800 dark:text-warm-200"
        >
          <option value="price">Price (High to Low)</option>
          <option value="size">Size (Large to Small)</option>
          <option value="energy">Energy Rating (Best to Worst)</option>
        </select>
      </div>
      {/* Properties list */}
      <div className="flex-1 overflow-y-auto">
        {loading && properties.length === 0 ? (
          <div className="p-4 dark:text-warm-400">Loading...</div>
        ) : (
          <>
            {filteredAndSorted.map((property, idx) => (
              <PropertyCard key={idx} property={property} />
            ))}
            {properties.length < total && (
              <button
                onClick={onLoadMore}
                disabled={loading}
                className="w-full p-4 text-teal-600 dark:text-teal-400 hover:bg-teal-50 dark:hover:bg-teal-900/30 disabled:opacity-50"
              >
                {loading ? 'Loading...' : `Load More (${total - properties.length} remaining)`}
              </button>
            )}
          </>
        )}
      </div>
    </div>
  );
 }
 function formatDuration(d: string): string {
  if (d === 'F') return 'Freehold';
  if (d === 'L') return 'Leasehold';
  return d;
 }
 function formatAge(value: number, approximate = true): string {
  if (value >= 1000) return approximate ? `~${Math.round(value)}` : `${Math.round(value)}`;
  return Math.round(value).toString();
 }
 // Helper to get a numeric value from a property, trying multiple field names
 function getNum(property: Property, ...keys: string[]): number | undefined {
  for (const key of keys) {
    const v = property[key];
    if (v !== undefined && v !== null && typeof v === 'number') return v;
  }
  return undefined;
 }
 // Property card component showing all fields
 function PropertyCard({ property }: { property: Property }) {
  const fmt = (value: number | undefined, decimals = 0): string => {
    if (value === undefined) return '';
    return decimals > 0 ? value.toFixed(decimals) : Math.round(value).toLocaleString();
  };
  const price = getNum(property, 'Last known price', 'latest_price');
  const pricePerSqm = getNum(property, 'Price per sqm', 'price_per_sqm');
  const floorArea = getNum(property, 'Total floor area (sqm)', 'total_floor_area');
  const rooms = getNum(
    property,
    'Rooms (including bedrooms & bathrooms)',
    'number_habitable_rooms'
  );
  const age = getNum(property, 'Approximate construction age', 'construction_age_band');
  return (
    <div className="p-4 border-b border-warm-100 dark:border-navy-800 hover:bg-warm-50 dark:hover:bg-navy-800">
      {/* Address & postcode */}
      <div className="font-semibold dark:text-warm-100">{property.address || 'Unknown Address'}</div>
      <div className="text-sm text-warm-600 dark:text-warm-400">{property.postcode}</div>
      {/* Price */}
      {price !== undefined && (
        <div className="mt-2 text-lg font-bold text-teal-700 dark:text-teal-400">
          £{fmt(price)}
          {pricePerSqm !== undefined && (
            <span className="text-sm font-normal text-warm-600 dark:text-warm-400"> (£{fmt(pricePerSqm)}/m²)</span>
          )}
        </div>
      )}
      {/* Property details grid */}
      <div className="mt-2 grid grid-cols-2 gap-x-4 gap-y-1 text-sm dark:text-warm-300">
        {property.property_type && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Type:</span> {property.property_type}
          </div>
        )}
        {property.built_form && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Built form:</span> {property.built_form}
          </div>
        )}
        {property.duration && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Tenure:</span> {formatDuration(property.duration)}
          </div>
        )}
        {floorArea !== undefined && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Floor area:</span> {fmt(floorArea)}m²
          </div>
        )}
        {rooms !== undefined && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Rooms:</span> {fmt(rooms)}
          </div>
        )}
        {age !== undefined && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">Built:</span> {formatAge(age, property.is_construction_date_approximate ?? true)}
          </div>
        )}
        {property.current_energy_rating && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">EPC rating:</span> {property.current_energy_rating}
          </div>
        )}
        {property.potential_energy_rating && (
          <div>
            <span className="text-warm-500 dark:text-warm-400">EPC potential:</span> {property.potential_energy_rating}
          </div>
        )}
      </div>
    </div>
  );
 }
--- a/frontend/src/components/ui/label.tsx
+++ b/frontend/src/components/ui/label.tsx
@ -7,6 +7,6 @@ interface LabelProps {
 export function Label({ children, className }: LabelProps) {
  return (
-    <label className={`text-sm font-medium text-slate-700 ${className || ''}`}>{children}</label>
+    <label className={`text-sm font-medium text-warm-700 dark:text-warm-300 ${className || ''}`}>{children}</label>
  );
 }
--- a/frontend/src/components/ui/slider.tsx
+++ b/frontend/src/components/ui/slider.tsx
@ -11,13 +11,13 @@ export function Slider({ className, ...props }: SliderProps) {
      className={cn('relative flex w-full touch-none select-none items-center', className)}
      {...props}
    >
-      <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-slate-200">
+      <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-warm-200 dark:bg-navy-700">
-        <SliderPrimitive.Range className="absolute h-full bg-slate-900" />
+        <SliderPrimitive.Range className="absolute h-full bg-teal-600" />
      </SliderPrimitive.Track>
      {props.value?.map((_, i) => (
        <SliderPrimitive.Thumb
          key={i}
-          className="block h-5 w-5 rounded-full border-2 border-slate-900 bg-white ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-slate-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
+          className="block h-5 w-5 rounded-full border-2 border-teal-600 dark:border-teal-500 bg-white dark:bg-navy-800 ring-offset-white dark:ring-offset-navy-950 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-teal-600 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
        />
      ))}
    </SliderPrimitive.Root>
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@ -9,3 +9,41 @@ body,
  margin: 0;
  padding: 0;
 }
 html.dark {
  background-color: #0a0e1a;
  color-scheme: dark;
 }
 /* Smooth theme transitions (scoped to avoid map performance issues) */
 body,
 div,
 aside,
 section,
 header,
 nav,
 button,
 input,
 select,
 label,
 span,
 p,
 h1,
 h2,
 h3 {
  transition: background-color 0.2s ease, border-color 0.2s ease, color 0.2s ease;
 }
 /* Fade-in animation for homepage sections */
 .fade-in-section {
  opacity: 0;
  transform: translateY(24px);
  transition:
    opacity 0.6s ease-out,
    transform 0.6s ease-out;
 }
 .fade-in-visible {
  opacity: 1;
  transform: translateY(0);
 }
--- a/frontend/src/index.html
+++ b/frontend/src/index.html
@ -3,7 +3,14 @@
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>UK Property Prices Map</title>
+    <title>Narrowit</title>
    <script>
      (function() {
        if (localStorage.getItem('theme') === 'dark') {
          document.documentElement.classList.add('dark');
        }
      })();
    </script>
  </head>
  <body>
    <div id="root"></div>
--- a/frontend/src/lib/constants.ts
+++ b/frontend/src/lib/constants.ts
@ -1,19 +0,0 @@
 import type { Filters } from '../types';
 // Filter configuration constants
 // Should match backend pipeline/config.py
 export const YEAR_MIN = 1995;
 export const YEAR_MAX = 2024;
 export const YEAR_STEP = 1;
 export const PRICE_MIN = 0;
 export const PRICE_MAX = 5000000; // £5M max for slider, but no server-side cap
 export const PRICE_STEP = 50000;
 export const DEFAULT_FILTERS: Filters = {
  minYear: 2020,
  maxYear: YEAR_MAX,
  minPrice: PRICE_MIN,
  maxPrice: PRICE_MAX,
 };
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@ -1,8 +1,31 @@
-export interface Filters {
+export interface FeatureMeta {
-  minYear: number;
+  name: string;
-  maxYear: number;
+  type: 'numeric' | 'enum';
-  minPrice: number;
+  group?: string;
-  maxPrice: number;
+  // Numeric-only fields
  min?: number;
  max?: number;
  step?: number;
  // Enum-only fields
  values?: string[];
  // Description fields
  description?: string;
  detail?: string;
  source?: string;
 }
 export interface FeatureGroup {
  name: string;
  features: FeatureMeta[];
 }
 // Filters: feature name -> [selectedMin, selectedMax] for numeric, string[] for enum
 export type FeatureFilters = Record<string, [number, number] | string[]>;
 export interface HexagonData {
  h3: string;
  count: number;
  [key: string]: string | number | null;
 }
 export interface Bounds {
@ -12,15 +35,6 @@ export interface Bounds {
  east: number;
 }
 export interface HexagonData {
  h3: string;
  count: number;
  avg_price: number;
  median_price: number;
  min_price: number;
  max_price: number;
 }
 export interface ViewState {
  longitude: number;
  latitude: number;
@ -33,6 +47,8 @@ export interface ViewChangeParams {
  resolution: number;
  bounds: Bounds;
  zoom: number;
  latitude: number;
  longitude: number;
 }
 export interface ApiResponse {
@ -43,21 +59,69 @@ export interface POI {
  id: string;
  name: string;
  category: string;
  group: string;
  lat: number;
  lng: number;
  emoji: string;
 }
 export interface POIResponse {
-  features: POI[];
+  pois: POI[];
 }
-export const POI_CATEGORY_GROUPS = [
+export interface POICategoryGroup {
-  'schools',
+  name: string;
-  'healthcare',
+  categories: string[];
-  'transport',
+}
  'parks',
  'emergency',
  'supermarkets',
 ] as const;
-export type POICategoryGroup = (typeof POI_CATEGORY_GROUPS)[number];
+export interface POICategoriesResponse {
  groups: POICategoryGroup[];
 }
 export interface Property {
  // String fields
  address?: string;
  postcode?: string;
  property_type?: string;
  built_form?: string;
  duration?: string;
  current_energy_rating?: string;
  potential_energy_rating?: string;
  // Numeric fields
  lat: number;
  lon: number;
  is_construction_date_approximate?: boolean;
  // All other numeric features (dynamic, including construction_age_band)
  [key: string]: string | number | boolean | undefined;
 }
 export interface HexagonPropertiesResponse {
  properties: Property[];
  total: number;
  limit: number;
  offset: number;
  truncated: boolean;
 }
 export interface NumericFeatureStats {
  name: string;
  count: number;
  min: number;
  max: number;
  mean: number;
  histogram: { min: number; max: number; bin_width: number; counts: number[] };
 }
 export interface EnumFeatureStats {
  name: string;
  counts: Record<string, number>;
 }
 export interface HexagonStatsResponse {
  count: number;
  numeric_features: NumericFeatureStats[];
  enum_features: EnumFeatureStats[];
 }
--- a/frontend/tailwind.config.js
+++ b/frontend/tailwind.config.js
@ -1,7 +1,54 @@
 module.exports = {
  darkMode: 'class',
  content: ['./src/**/*.{js,jsx,ts,tsx,html}'],
  theme: {
-    extend: {},
+    extend: {
      colors: {
        navy: {
          50: '#eef1f8',
          100: '#d9dff0',
          200: '#b3bfe1',
          300: '#8d9fd2',
          400: '#677fc3',
          500: '#4a63a8',
          600: '#2a3f6b',
          700: '#1e2d50',
          800: '#141e38',
          900: '#0f1528',
          950: '#0a0e1a',
        },
        teal: {
          50: '#effefb',
          100: '#c7fff4',
          200: '#90ffe9',
          300: '#51f7d9',
          400: '#1de4c3',
          500: '#05c9aa',
          600: '#00a28c',
          700: '#058172',
          800: '#0a665b',
          900: '#0d544c',
          950: '#003330',
        },
        coral: {
          400: '#fb923c',
          500: '#f97316',
          600: '#ea580c',
        },
        warm: {
          50: '#fafaf9',
          100: '#f5f5f4',
          200: '#e7e5e4',
          300: '#d6d3d1',
          400: '#a8a29e',
          500: '#78716c',
          600: '#57534e',
          700: '#44403c',
          800: '#292524',
          900: '#1c1917',
        },
      },
    },
  },
  plugins: [require('tailwindcss-animate')],
 };
--- a/generate_tfl_client.py
+++ b/generate_tfl_client.py
@ -1,49 +0,0 @@
 #!/usr/bin/env python3
 # /// script
 # requires-python = ">=3.12"
 # dependencies = ["openapi-python-client"]
 # ///
 """Regenerate the TfL Journey API client from the OpenAPI specification."""
 # Run it with:
 #   uv run generate_tfl_client.py
 import subprocess
 from pathlib import Path
 OPENAPI_SPEC = Path("Journey.yaml")
 OUTPUT_PATH = Path("tfl_journey_client")
 def main() -> None:
    if not OPENAPI_SPEC.exists():
        raise FileNotFoundError(f"OpenAPI spec not found: {OPENAPI_SPEC}")
    # Skip if client already exists
    if OUTPUT_PATH.exists():
        print(f"TfL client already exists at {OUTPUT_PATH}, skipping")
        return
    # Generate the client
    print(f"Generating client from {OPENAPI_SPEC}")
    result = subprocess.run(
        [
            "openapi-python-client",
            "generate",
            "--path",
            str(OPENAPI_SPEC),
            "--output-path",
            str(OUTPUT_PATH),
        ],
        check=True,
    )
    if result.returncode == 0:
        print(f"Client generated successfully at {OUTPUT_PATH}")
    else:
        print("Client generation failed")
        raise SystemExit(1)
 if __name__ == "__main__":
    main()
--- a/main.py
+++ b/main.py
@ -1,6 +0,0 @@
 def main():
    print("Hello from property-map!")
 if __name__ == "__main__":
    main()
--- a/pipeline/base.py
+++ b/pipeline/base.py
@ -1,22 +0,0 @@
 from abc import ABC, abstractmethod
 import polars as pl
 class DataSource(ABC):
    """Base class for all data sources."""
    @property
    @abstractmethod
    def name(self) -> str:
        """Unique identifier for this data source."""
        pass
    @abstractmethod
    def load(self) -> pl.LazyFrame:
        """Load raw data as LazyFrame."""
        pass
    @abstractmethod
    def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
        """Process and join with postcode coordinates."""
        pass
--- a/pipeline/config.py
+++ b/pipeline/config.py
@ -1,22 +0,0 @@
 """Shared configuration for the pipeline and server."""
 from pathlib import Path
 DATA_DIR = Path(__file__).parent.parent / "data_sources"
 PROCESSED_DIR = DATA_DIR / "processed"
 AGGREGATES_DIR = PROCESSED_DIR / "aggregates"
 # H3 resolutions to generate and serve
 # https://h3geo.org/docs/core-library/restable/#average-area-in-m2
 H3_RESOLUTIONS = [7, 8, 9, 10, 11]
 DEFAULT_H3_RESOLUTION = 8
 # Year filters
 MIN_YEAR = 1995
 MAX_YEAR = 2024
 DEFAULT_MIN_YEAR = 2020
 DEFAULT_MAX_YEAR = 2024
 # Price filters
 DEFAULT_MIN_PRICE = 0
 DEFAULT_MAX_PRICE = 100_000_000
--- a/pipeline/download/arcgis.py
+++ b/pipeline/download/arcgis.py
@ -0,0 +1,38 @@
 import argparse
 import tempfile
 import polars as pl
 from pathlib import Path
 from pipeline.utils import download, extract_zip
 URL = "https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data"
 def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
    df = pl.scan_csv(data_path / "Data/NSPL_MAY_2025_UK.csv", try_parse_dates=True)
    print(f"Columns: {df.collect_schema().names()}")
    parquet_path.parent.mkdir(parents=True, exist_ok=True)
    df.sink_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download and convert ArcGIS postcode data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory() as cache_dir:
        download_path = Path(cache_dir) / "arcgis_data.zip"
        extract_path = Path(cache_dir) / "arcgis_extracted"
        download(URL, download_path)
        extract_zip(download_path, extract_path)
        convert_to_parquet(extract_path, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/broadband.py
+++ b/pipeline/download/broadband.py
@ -0,0 +1,62 @@
 import argparse
 import tempfile
 import polars as pl
 from pathlib import Path
 from pipeline.utils import download, extract_zip
 # Ofcom Connected Nations 2025 - Fixed broadband performance (output area & local authority level)
 # Source: https://www.ofcom.org.uk/phones-and-broadband/coverage-and-speeds/connected-nations-20252/data-downloads-2025
 PERFORMANCE_URL = "https://www.ofcom.org.uk/siteassets/resources/documents/research-and-data/multi-sector/infrastructure-research/connected-nations-2025/202507_fixed_broadband_coverage_r01.zip?v=407830"
 def convert_to_parquet(extract_dir: Path, parquet_path: Path) -> None:
    # Find CSV files in the extracted directory
    csv_files = list(extract_dir.rglob("*.csv"))
    if not csv_files:
        raise FileNotFoundError(f"No CSV files found in {extract_dir}")
    print(f"Found {len(csv_files)} CSV files: {[f.name for f in csv_files]}")
    frames = []
    for csv_file in sorted(csv_files):
        print(f"Reading {csv_file.name}...")
        df = pl.read_csv(csv_file, infer_schema_length=10000, encoding="utf8-lossy")
        print(f"  Shape: {df.shape}")
        frames.append(df)
    combined = pl.concat(frames, how="diagonal_relaxed")
    print(f"Combined shape: {combined.shape}")
    parquet_path.parent.mkdir(parents=True, exist_ok=True)
    combined.write_parquet(parquet_path, compression="zstd")
    print(f"Saved {parquet_path} ({combined.shape[0]} rows)")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download Ofcom broadband performance data"
    )
    parser.add_argument(
        "--output",
        type=Path,
        required=True,
        help="Output parquet file path",
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory(delete=False) as cache_dir:
        cache = Path(cache_dir)
        zip_path = cache / "broadband_performance.zip"
        extract_dir = cache / "extracted"
        extracted_again_dir = cache / "extracted-again"
        download(PERFORMANCE_URL, zip_path)
        extract_zip(zip_path, extract_dir)
        print(list((extract_dir / "202507_fixed_coverage_r01").glob("*")))
        extract_zip(extract_dir / "202507_fixed_coverage_r01" / "202507_fixed_pc_coverage_r01.zip", extracted_again_dir)
        convert_to_parquet(extracted_again_dir, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/deprivation_data.py
+++ b/pipeline/download/deprivation_data.py
@ -0,0 +1,43 @@
 import argparse
 import tempfile
 import polars as pl
 from pathlib import Path
 from pipeline.utils import download
 URL = "https://assets.publishing.service.gov.uk/media/691ded34513046b952c500bd/File_5_IoD2025_Scores_for_the_Indices_of_Deprivation.xlsx"
 def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
    print("Reading Excel file (sheet 2)...")
    # Read the 2nd sheet (index 1) - IoD2025 Scores
    df = pl.read_excel(
        xlsx_path,
        sheet_id=2,  # 1-indexed, so 2 = second sheet
    )
    print(f"Shape: {df.shape}")
    print(f"Columns: {df.columns}")
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download and convert Index of Deprivation data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory() as cache_dir:
        xlsx_path = Path(cache_dir) / "IoD2025_Scores.xlsx"
        download(URL, xlsx_path, timeout=60)
        convert_to_parquet(xlsx_path, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/ethnicity.py
+++ b/pipeline/download/ethnicity.py
@ -0,0 +1,58 @@
 import argparse
 from pathlib import Path
 import httpx
 import polars as pl
 pl.Config.set_tbl_cols(-1)
 URL = "https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/downloads/population-by-ethnicity-and-local-authority-2021.csv"
 def download_and_convert(output_path: Path) -> None:
    print("Downloading ethnicity data...")
    response = httpx.get(URL, follow_redirects=True, timeout=60)
    response.raise_for_status()
    df = pl.read_csv(response.content)
    print(f"Raw shape: {df.head(100)}")
    # Keep only broad ethnicity categories (5+1), exclude "All" totals
    df = df.filter(
        (pl.col("Ethnicity_type") == "ONS 2021 5+1") & (pl.col("Ethnicity") != "All")
    )
    # Pivot: one row per local authority, columns = ethnicity percentages
    wide = df.pivot(
        on="Ethnicity",
        index="Geography_code",
        values="Value1",
    )
    # Rename columns to be descriptive
    rename_map = {
        col: f"% {col}" for col in wide.columns if col != "Geography_code"
    }
    wide = wide.rename(rename_map)
    print(f"Output shape: {wide.shape}")
    print(f"Columns: {wide.columns}")
    wide.write_parquet(output_path, compression="zstd")
    print(f"Saved to {output_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download and convert ethnicity by local authority data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    download_and_convert(args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/naptan.py
+++ b/pipeline/download/naptan.py
@ -0,0 +1,69 @@
 """Download NaPTAN data and extract railway/metro station POIs."""
 import argparse
 import io
 import urllib.request
 from pathlib import Path
 import polars as pl
 NAPTAN_CSV_URL = "https://naptan.api.dft.gov.uk/v1/access-nodes?dataFormat=csv"
 STOP_TYPES = {
    'AIR': "Airport",
    'FTD': "Ferry",
    "RSE": "Rail station",
    "BCT": "Bus stop",
    "BCE": "Bus station",
    "TXR": "Taxi rank",
    "TMU": "Metro or Tram stop",   
 }
 def download_naptan(output: Path) -> None:
    output.parent.mkdir(parents=True, exist_ok=True)
    print(f"Downloading NaPTAN data from {NAPTAN_CSV_URL}")
    with urllib.request.urlopen(NAPTAN_CSV_URL) as resp:
        raw = resp.read()
    print(f"Downloaded {len(raw) / (1024 * 1024):.1f} MB")
    df = (
        pl.read_csv(io.BytesIO(raw), infer_schema_length=0)
        .with_columns(
            pl.col("Latitude").cast(pl.Float64, strict=False),
            pl.col("Longitude").cast(pl.Float64, strict=False),
        )
        .drop_nulls(subset=["Latitude", "Longitude"])
        .filter(pl.col("StopType").is_in(list(STOP_TYPES.keys())))
        .select(
            pl.col("ATCOCode").alias("id"),
            pl.col("CommonName").alias("name"),
            pl.col("StopType").replace(STOP_TYPES).alias("category"),
            pl.col("Latitude").alias("lat"),
            pl.col("Longitude").alias("lng"),
        )
    )
    df.write_parquet(output)
    size_mb = output.stat().st_size / (1024 * 1024) 
    print(f"Wrote {output} ({size_mb:.1f} MB, {len(df):,} stations)")
    counts = df.group_by("category").len().sort("len", descending=True)
    for row in counts.iter_rows(named=True):
        print(f"  {row['category']}: {row['len']:,}")
 def main() -> None:
    parser = argparse.ArgumentParser(description="Download NaPTAN station data")
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    download_naptan(args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/noise.py
+++ b/pipeline/download/noise.py
@ -0,0 +1,283 @@
 """Download Defra Round 4 (2022) strategic noise data for England.
 Downloads modelled noise levels (road, rail, airport) as GeoTIFF rasters via
 WCS, then samples noise values at postcode centroids. Outputs a parquet file
 with postcode-level noise in dB for each source.
 Uses 100km tiles (~42 per source) to balance request size vs count. The server
 times out on tiles larger than ~150km at 100m resolution.
 Data source: Defra Strategic Noise Mapping Round 4 (2022)
  - Lden = day-evening-night 24h weighted average (the EU standard metric)
  - 10m grid, modelled at 4m above ground
 License: Open Government Licence v3.0
 Note: Road/rail use WCS 1.0.0; airport requires WCS 2.0.1 (Defra's 1.0.0
 endpoint is broken for that coverage).
 """
 import argparse
 import tempfile
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 import httpx
 import numpy as np
 import polars as pl
 import rasterio
 from pyproj import Transformer
 from rasterio.merge import merge
 from rasterio.transform import rowcol
 # Noise sources: (label, column_name, WCS base URL, coverage ID, WCS version)
 # Road/rail work with WCS 1.0.0; airport requires WCS 2.0.1.
 NOISE_SOURCES = [
    (
        "Road",
        "road_noise_lden_db",
        "https://environment.data.gov.uk/spatialdata/road-noise-all-metrics-england-round-4/wcs",
        "Road_Noise_Lden_England_Round_4_All",
        "1.0.0",
    ),
    (
        "Rail",
        "rail_noise_lden_db",
        "https://environment.data.gov.uk/spatialdata/noise-data/wcs",
        "Rail_Noise_Lden_England_Round_4_All",
        "1.0.0",
    ),
    (
        "Airport",
        "airport_noise_lden_db",
        "https://environment.data.gov.uk/spatialdata/airport-noise-all-metrics-england-round-4/wcs",
        "dac9cba4-abe7-43bd-b8e9-8a83da52edd8__Airport_Noise_ALL_Lden",
        "2.0.1",
    ),
 ]
 # England extent in EPSG:27700 (British National Grid), rounded outward
 BNG_MIN_E = 80_000
 BNG_MAX_E = 660_000
 BNG_MIN_N = 0
 BNG_MAX_N = 660_000
 # Tile size in metres (100km balances request size vs count; 300km causes 504s)
 TILE_SIZE = 100_000
 # Max concurrent tile downloads
 MAX_WORKERS = 4
 # Native raster resolution (10m grid)
 NATIVE_RESOLUTION = 10
 # Request pixel resolution in metres (100m is sufficient for postcode-level data
 # and keeps download size ~100x smaller than native 10m)
 RESOLUTION = 100
 def _wcs_get_coverage_url(
    wcs_base: str,
    coverage_id: str,
    min_e: int,
    min_n: int,
    max_e: int,
    max_n: int,
    wcs_version: str = "1.0.0",
 ) -> str:
    """Build a WCS GetCoverage URL for a BNG bounding box."""
    if wcs_version == "2.0.1":
        return (
            f"{wcs_base}?"
            f"service=WCS&version=2.0.1&request=GetCoverage"
            f"&coverageId={coverage_id}"
            f"&format=image/tiff"
            f"&subsettingCRS=EPSG:27700"
            f"&subset=E({min_e},{max_e})"
            f"&subset=N({min_n},{max_n})"
            f"&scaleFactor={NATIVE_RESOLUTION / RESOLUTION}"
        )
    width = (max_e - min_e) // RESOLUTION
    height = (max_n - min_n) // RESOLUTION
    return (
        f"{wcs_base}?"
        f"service=WCS&version=1.0.0&request=GetCoverage"
        f"&coverage={coverage_id}"
        f"&CRS=EPSG:27700"
        f"&BBOX={min_e},{min_n},{max_e},{max_n}"
        f"&width={width}&height={height}"
        f"&format=GeoTIFF"
    )
 _TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
 def _bng_from_latlon(lat: np.ndarray, lon: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """Convert WGS84 lat/lon to British National Grid easting/northing."""
    return _TO_BNG.transform(lon, lat)  # pyproj takes (x=lon, y=lat)
 def _download_tile(
    wcs_base: str,
    coverage_id: str,
    min_e: int,
    min_n: int,
    max_e: int,
    max_n: int,
    tile_path: Path,
    wcs_version: str = "1.0.0",
 ) -> Path | None:
    """Download a single WCS tile. Returns path if successful, None otherwise."""
    url = _wcs_get_coverage_url(wcs_base, coverage_id, min_e, min_n, max_e, max_n, wcs_version)
    try:
        with httpx.Client(timeout=300, follow_redirects=True) as client:
            resp = client.get(url)
            resp.raise_for_status()
        content_type = resp.headers.get("content-type", "")
        if "tiff" not in content_type and resp.content[:4] not in (b"II*\x00", b"MM\x00*"):
            return None
        tile_path.write_bytes(resp.content)
        return tile_path
    except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
        print(f"  Failed to download tile ({min_e},{min_n})-({max_e},{max_n}): {e}")
        return None
 def download_raster(
    tile_dir: Path, wcs_base: str, coverage_id: str, label: str, wcs_version: str = "1.0.0"
 ) -> list[Path]:
    """Download noise GeoTIFF raster covering England, returning paths to saved files."""
    tiles = []
    for min_e in range(BNG_MIN_E, BNG_MAX_E, TILE_SIZE):
        for min_n in range(BNG_MIN_N, BNG_MAX_N, TILE_SIZE):
            max_e = min(min_e + TILE_SIZE, BNG_MAX_E)
            max_n = min(min_n + TILE_SIZE, BNG_MAX_N)
            tiles.append((min_e, min_n, max_e, max_n))
    print(f"[{label}] Downloading {len(tiles)} tiles at {RESOLUTION}m resolution ({MAX_WORKERS} workers)...")
    paths = []
    completed = 0
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {}
        for min_e, min_n, max_e, max_n in tiles:
            tile_path = tile_dir / f"tile_{min_e}_{min_n}.tif"
            fut = executor.submit(
                _download_tile, wcs_base, coverage_id,
                min_e, min_n, max_e, max_n, tile_path, wcs_version,
            )
            futures[fut] = (min_e, min_n)
        for fut in as_completed(futures):
            completed += 1
            result = fut.result()
            if result is not None:
                paths.append(result)
            print(
                f"\r  [{completed}/{len(tiles)}] Downloaded {len(paths)} valid tiles",
                end="",
                flush=True,
            )
    print(f"\n[{label}] Downloaded {len(paths)}/{len(tiles)} tiles")
    return paths
 def sample_noise_at_postcodes(
    tile_paths: list[Path],
    easting: np.ndarray,
    northing: np.ndarray,
    label: str,
    col_name: str,
 ) -> pl.Series:
    """Sample noise values from merged tiles at given BNG coordinates."""
    print(f"[{label}] Merging {len(tile_paths)} tiles...")
    datasets = [rasterio.open(p) for p in tile_paths]
    raster_nodata = datasets[0].nodata
    mosaic, mosaic_transform = merge(datasets)
    for ds in datasets:
        ds.close()
    noise_grid = mosaic[0]
    print(f"[{label}] Sampling noise values at postcode centroids...")
    rows, cols = rowcol(mosaic_transform, easting, northing)
    rows = np.asarray(rows)
    cols = np.asarray(cols)
    h, w = noise_grid.shape
    in_bounds = (rows >= 0) & (rows < h) & (cols >= 0) & (cols < w)
    noise_db = np.full(len(easting), np.nan, dtype=np.float32)
    valid_rows = rows[in_bounds]
    valid_cols = cols[in_bounds]
    sampled = noise_grid[valid_rows, valid_cols].astype(np.float32)
    # Mark nodata and zero (unmapped areas) as NaN.
    # Road/rail use nodata=-96, airport uses nodata=3.4e38.
    if raster_nodata is not None:
        sampled[np.isclose(sampled, np.float32(raster_nodata), rtol=1e-5)] = np.nan
    sampled[sampled == 0] = np.nan
    noise_db[in_bounds] = sampled
    valid_count = int(np.sum(~np.isnan(noise_db)))
    print(f"[{label}] Sampled {valid_count:,} / {len(easting):,} postcodes with noise data")
    # Return as masked Series: use null (not NaN) so that Polars max_horizontal
    # correctly ignores missing values instead of propagating NaN.
    return pl.Series(col_name, noise_db).fill_nan(None)
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download Defra noise data (road, rail, airport) and sample at postcode centroids"
    )
    parser.add_argument(
        "--arcgis",
        type=Path,
        required=True,
        help="ArcGIS postcode data parquet (for lat/lon coordinates)",
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    args.output.parent.mkdir(parents=True, exist_ok=True)
    print("Loading postcode coordinates...")
    postcodes = pl.read_parquet(
        args.arcgis, columns=["pcds", "lat", "long"]
    ).rename({"pcds": "postcode", "long": "lon"})
    lat = postcodes["lat"].to_numpy()
    lon = postcodes["lon"].to_numpy()
    print("Converting lat/lon to BNG...")
    easting, northing = _bng_from_latlon(lat, lon)
    result = postcodes.select("postcode")
    with tempfile.TemporaryDirectory() as tmp:
        for label, col_name, wcs_base, coverage_id, wcs_version in NOISE_SOURCES:
            tile_dir = Path(tmp) / label.lower()
            tile_dir.mkdir()
            tile_paths = download_raster(tile_dir, wcs_base, coverage_id, label, wcs_version)
            if not tile_paths:
                print(f"[{label}] WARNING: No tiles downloaded — column will be all null")
                series = pl.Series(col_name, [None] * len(lat), dtype=pl.Float32)
            else:
                series = sample_noise_at_postcodes(tile_paths, easting, northing, label, col_name)
            result = result.with_columns(series)
    result.write_parquet(args.output, compression="zstd")
    size_mb = args.output.stat().st_size / (1024 * 1024)
    print(f"Wrote {args.output} ({size_mb:.1f} MB)")
 if __name__ == "__main__":
    main()
--- a/pipeline/download/ofsted.py
+++ b/pipeline/download/ofsted.py
@ -0,0 +1,46 @@
 import argparse
 import tempfile
 import polars as pl
 from pathlib import Path
 from pipeline.utils import download
 # Management information - state-funded schools - latest inspections (as at 30 Apr 2025)
 # Source: https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes
 URL = "https://assets.publishing.service.gov.uk/media/681cd390275cb67b18d870fc/Management_information_-_state-funded_schools_-_latest_inspections_as_at_30_Apr_2025.csv"
 def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
    print("Reading CSV...")
    df = pl.read_csv(
        csv_path,
        infer_schema_length=10000,
        encoding="utf8-lossy",
        null_values=["NULL", "Not applicable"],
    )
    print(f"Shape: {df.shape}")
    print(f"Columns: {df.columns}")
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download Ofsted school inspection outcomes data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory() as cache_dir:
        csv_path = Path(cache_dir) / "ofsted_latest_inspections.csv"
        download(URL, csv_path, timeout=60)
        convert_to_parquet(csv_path, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/download/pois.py
+++ b/pipeline/download/pois.py
@ -0,0 +1,175 @@
 import argparse
 import tempfile
 import urllib.request
 from pathlib import Path
 from tempfile import mkdtemp
 import osmium
 import polars as pl
 from tqdm import tqdm
 BATCH_SIZE = 50_000
 MIN_OCCURENCE_COUNT = 20
 GEOFABRIK_GB_URL = "https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
 UK_BBOX_WEST = -7.57
 UK_BBOX_SOUTH = 49.96
 UK_BBOX_EAST = 1.68
 UK_BBOX_NORTH = 58.64
 POI_TAG_KEYS: list[str] = [
    "amenity",
    "building",
    "craft",
    "emergency",
    "healthcare",
    "leisure",
    "office",
    "shop",
    "tourism",
    "public_transport",
 ]
 def download_pbf(pbf_file: Path) -> None:
    pbf_file.parent.mkdir(parents=True, exist_ok=True)
    tmp = pbf_file.with_suffix(".pbf.tmp")
    print(f"Downloading {GEOFABRIK_GB_URL}")
    with (
        tqdm(unit="B", unit_scale=True, desc="Downloading") as bar,
        urllib.request.urlopen(GEOFABRIK_GB_URL) as resp,
        open(tmp, "wb") as f,
    ):
        length = resp.headers.get("Content-Length")
        if length:
            bar.total = int(length)
        while chunk := resp.read(1 << 20):
            f.write(chunk)
            bar.update(len(chunk))
    tmp.rename(pbf_file)
    print(f"Saved to {pbf_file}")
 class POIHandler(osmium.SimpleHandler):
    def __init__(self, progress: tqdm, tmp_dir: Path) -> None:
        super().__init__()
        self._batch: list[dict] = []
        self._tmp_dir = tmp_dir
        self._batch_num = 0
        self.poi_count = 0
        self._progress = progress
    def _in_uk(self, lat: float, lon: float) -> bool:
        return (
            UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
            and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
        )
    def _match_tags(self, tags: osmium.osm.TagList) -> list[str]:
        return [f"{key}/{tags[key]}" for key in POI_TAG_KEYS if key in tags]
    def _get_name(self, tags: osmium.osm.TagList) -> str:
        return tags.get("name:en", tags.get("name", ""))
    def _flush_batch(self) -> None:
        if not self._batch:
            return
        df = pl.DataFrame(self._batch)
        out = self._tmp_dir / f"batch_{self._batch_num:05d}.parquet"
        df.write_parquet(out)
        self._batch_num += 1
        self._batch.clear()
    def _add_poi(
        self,
        osm_id: str,
        tags: osmium.osm.TagList,
        category: str,
        lat: float,
        lng: float,
    ) -> None:
        self._batch.append(
            {
                "id": osm_id,
                "name": self._get_name(tags),
                "category": category,
                "lat": lat,
                "lng": lng,
            }
        )
        self.poi_count += 1
        self._progress.set_postfix(pois=f"{self.poi_count:,}", refresh=False)
        if len(self._batch) >= BATCH_SIZE:
            self._flush_batch()
    def _tick(self) -> None:
        self._progress.update(1)
    def node(self, n: osmium.osm.Node) -> None:
        self._tick()
        if not n.location.valid:
            return
        lat, lon = n.location.lat, n.location.lon
        if not self._in_uk(lat, lon):
            return
        categories = self._match_tags(n.tags)
        for category in categories:
            self._add_poi(f"n{n.id}", n.tags, category, lat, lon)
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download and extract POIs from OpenStreetMap"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory() as cache_dir:
        pbf_file = Path(cache_dir) / "great-britain-latest.osm.pbf"
        if not pbf_file.exists():
            download_pbf(pbf_file)
        else:
            print(f"Using cached PBF file at {pbf_file}")
        print(f"Tag keys: {POI_TAG_KEYS}")
        tmp_dir = Path(mkdtemp(prefix="pois_"))
        with tqdm(
            unit=" elements",
            unit_scale=True,
            desc="Streaming",
            smoothing=0.05,
            mininterval=1.0,
        ) as progress:
            handler = POIHandler(progress, tmp_dir)
            handler.apply_file(str(pbf_file), locations=True)
            handler._flush_batch()  # write any remaining POIs
        print(f"Extracted {handler.poi_count:,} POIs")
        batch_files = sorted(tmp_dir.glob("batch_*.parquet"))
        df = pl.concat([pl.scan_parquet(f) for f in batch_files])
        # Only keep categories with enough occurrences
        valid_categories = (
            df.group_by("category")
            .agg(pl.len().alias("count"))
            .filter(pl.col("count") >= MIN_OCCURENCE_COUNT)
        )
        df = df.join(valid_categories.select("category"), on="category", how="semi")
        print(f"Total POIs: {handler.poi_count:,}")
        df.sink_parquet(args.output)
        print(f"Saved to {args.output}")
 if __name__ == "__main__":
    main()
--- a/pipeline/download/price_paid.py
+++ b/pipeline/download/price_paid.py
@ -0,0 +1,66 @@
 import argparse
 import tempfile
 import polars as pl
 from pathlib import Path
 from pipeline.utils import download
 URL = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv"
 def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
    """Convert CSV to Parquet using Polars."""
    print("Converting to Parquet...")
    # https://www.gov.uk/guidance/about-the-price-paid-data
    # Land Registry CSV columns
    columns = [
        "transaction_id",
        "price",
        "date_of_transfer",
        "postcode",
        "property_type",
        "old_new",
        "duration",
        "paon",
        "saon",
        "street",
        "locality",
        "town_city",
        "district",
        "county",
        "ppd_category",
        "record_status",
    ]
    df = pl.read_csv(
        csv_path,
        has_header=False,
        new_columns=columns,
        try_parse_dates=True,
    )
    parquet_path.parent.mkdir(parents=True, exist_ok=True)
    print(f"Columns: {df.collect_schema().names()}")
    df.write_parquet(parquet_path, compression="zstd")
    print(f"Saved to {parquet_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Download and convert Land Registry price-paid data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    with tempfile.TemporaryDirectory() as cache_dir:
        csv_path = Path(cache_dir) / "price-paid-complete.csv"
        download(URL, csv_path)
        convert_to_parquet(csv_path, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/journey_times/init.py
+++ b/pipeline/journey_times/init.py
@ -0,0 +1,29 @@
 """Journey times calculation module for TfL transit data."""
 from .config import (
    DATA_DIR,
    DESTINATIONS,
    MAX_CONCURRENT,
    MAX_DELAY,
    MAX_POSTCODES,
    OUTPUT_DIR,
    REQUESTS_PER_MIN,
 )
 from .models import Destination, JourneyResult
 from .results import results_to_dataframe, save_results
 from .tfl_client import fetch_journey_times
 __all__ = [
    "DATA_DIR",
    "OUTPUT_DIR",
    "MAX_DELAY",
    "REQUESTS_PER_MIN",
    "MAX_POSTCODES",
    "MAX_CONCURRENT",
    "DESTINATIONS",
    "Destination",
    "JourneyResult",
    "fetch_journey_times",
    "results_to_dataframe",
    "save_results",
 ]
--- a/pipeline/journey_times/main.py
+++ b/pipeline/journey_times/main.py
@ -0,0 +1,142 @@
 import asyncio
 import random
 from datetime import date, timedelta
 import polars as pl
 from tqdm import tqdm
 from .config import (
    DESTINATIONS,
    MAX_CONCURRENT,
    MAX_POSTCODES,
    OUTPUT_DIR,
    MAX_DISTANCE_KM,
 )
 from .models import JourneyResult
 from .results import CheckpointSaver, results_to_dataframe, save_results
 from .tfl_client import fetch_journey_times
 from pipeline.utils import haversine_km_expr
 def main():
    destination = DESTINATIONS["bank"]
    # Calculate next Monday at 8am
    today = date.today()
    days_until_monday = (7 - today.weekday()) % 7 or 7
    journey_date = today + timedelta(days=days_until_monday)
    journey_time = "0845"
    print(f"Destination: {destination.name}")
    print(
        f"Journey: {journey_date.strftime('%A %Y-%m-%d')} "
        f"at {journey_time[:2]}:{journey_time[2:]}"
    )
    postcodes_df = pl.read_parquet(OUTPUT_DIR / "postcodes_h3.parquet")
    print(f"Loaded {postcodes_df.height:,} postcodes")
    # Filter to postcodes within range of destination
    postcodes_df = postcodes_df.with_columns(
        haversine_km_expr("lat", "long", destination.lat, destination.lon).alias(
            "distance_km"
        )
    ).filter(pl.col("distance_km") <= MAX_DISTANCE_KM)
    print(f"Filtered to {postcodes_df.height:,} postcodes within {MAX_DISTANCE_KM}km")
    postcode_data = list(
        zip(
            postcodes_df["postcode"].to_list(),
            postcodes_df["lat"].to_list(),
            postcodes_df["long"].to_list(),
        )
    )
    if MAX_POSTCODES is not None and len(postcode_data) > MAX_POSTCODES:
        postcode_data = random.sample(postcode_data, MAX_POSTCODES)
        print(f"Randomly sampled {MAX_POSTCODES} postcodes")
    checkpoint_saver = CheckpointSaver(
        destination_name=destination.name,
        on_save=lambda path, count: print(
            f"Checkpoint saved: {count:,} results to {path}"
        ),
    )
    # 25556/76273 
    # Resume from checkpoint if one exists
    checkpoint_path = checkpoint_saver._checkpoint_path()
    prior_results: list[JourneyResult] = []
    if checkpoint_path.exists():
        checkpoint_df = pl.read_parquet(checkpoint_path)
        # Deduplicate checkpoint rows per postcode, preferring rows with data
        checkpoint_df = (
            checkpoint_df.sort("public_transport_quick_minutes", nulls_last=True)
            .unique(subset=["postcode"], keep="first")
        )
        completed_postcodes = set(checkpoint_df["postcode"].to_list())
        prior_results = [
            JourneyResult(
                postcode=row["postcode"],
                public_transport_easy_minutes=row["public_transport_easy_minutes"],
                public_transport_quick_minutes=row["public_transport_quick_minutes"],
                cycling_minutes=row["cycling_minutes"],
                error=row["error"],
            )
            for row in checkpoint_df.iter_rows(named=True)
        ]
        checkpoint_saver.results = prior_results
        checkpoint_saver._last_save_count = len(prior_results)
        postcode_data = [
            (pc, lat, lon)
            for pc, lat, lon in postcode_data
            if pc not in completed_postcodes
        ]
        print(
            f"Resumed from checkpoint: {len(prior_results):,} already done, "
            f"{len(postcode_data):,} remaining"
        )
    def on_result(result):
        pbar.update(1)
        checkpoint_saver.add_result(result)
    with tqdm(total=len(postcode_data), desc="Fetching journeys") as pbar:
        new_results = asyncio.run(
            fetch_journey_times(
                postcode_data,
                destination,
                journey_date.strftime("%Y%m%d"),
                journey_time,
                MAX_CONCURRENT,
                progress_callback=on_result,
            )
        )
    all_results = prior_results + new_results
    results_df = results_to_dataframe(all_results)
    all_postcodes = {r.postcode for r in all_results}
    coords_df = postcodes_df.filter(
        pl.col("postcode").is_in(all_postcodes)
    ).select(["postcode", "lat", "long"])
    results_df = coords_df.join(results_df, on="postcode", how="left")
    results_df = results_df.with_columns(
        pl.lit(destination.name).alias("destination"),
        pl.lit(journey_date.strftime("%Y-%m-%d")).alias("journey_date"),
        pl.lit(f"{journey_time[:2]}:{journey_time[2:]}").alias("journey_time"),
    )
    successful = results_df.filter(pl.col("cycling_minutes").is_not_null()).height
    print(f"Completed: {successful}/{len(all_results)} successful")
    parquet_path = save_results(results_df, destination.name)
    checkpoint_saver.cleanup_checkpoint()
    print(f"Saved to {parquet_path}")
 if __name__ == "__main__":
    main()
--- a/pipeline/journey_times/config.py
+++ b/pipeline/journey_times/config.py
@ -0,0 +1,23 @@
 """Configuration constants for journey times processing."""
 from .models import Destination
 MAX_DELAY = 10
 REQUESTS_PER_MIN = 500
 MAX_POSTCODES = None
 MAX_CONCURRENT = 80
 MAX_DISTANCE_KM = 110
 CHECKPOINT_INTERVAL = 10000
 DESTINATIONS = {
    "bank": Destination(51.5133, -0.0886, "Bank", "940GZZLUBNK"),
    "waterloo": Destination(51.5031, -0.1132, "Waterloo", "940GZZLUWLO"),
    "kings-cross": Destination(51.5308, -0.1238, "King's Cross", "940GZZLUKSX"),
    "liverpool-street": Destination(
        51.5178, -0.0823, "Liverpool Street", "940GZZLULVS"
    ),
    "paddington": Destination(51.5154, -0.1755, "Paddington", "940GZZLUPAC"),
    "victoria": Destination(51.4965, -0.1447, "Victoria", "940GZZLUVIC"),
 }
--- a/pipeline/journey_times/data.py
+++ b/pipeline/journey_times/data.py
--- a/pipeline/journey_times/models.py
+++ b/pipeline/journey_times/models.py
@ -0,0 +1,30 @@
 """Data models for journey times processing."""
 from dataclasses import dataclass
@dataclass
 class Destination:
    """A destination point for journey planning."""
    lat: float
    lon: float
    name: str
    naptan_id: str | None = None
    def to_tfl_location(self) -> str:
        """Convert to TfL API location string."""
        if self.naptan_id:
            return self.naptan_id
        return f"{self.lat},{self.lon}"
@dataclass
 class JourneyResult:
    """Result of a journey time calculation for a postcode."""
    postcode: str
    public_transport_easy_minutes: int | None = None
    cycling_minutes: int | None = None
    public_transport_quick_minutes: int | None = None
    error: str | None = None
--- a/pipeline/journey_times/rate_limiter.py
+++ b/pipeline/journey_times/rate_limiter.py
@ -0,0 +1,35 @@
 """Rate limiting for TfL API requests."""
 import asyncio
 import warnings
 from .config import REQUESTS_PER_MIN
 class RateLimiter:
    """Rate limiter enforcing max requests per minute."""
    def __init__(self):
        self.request_times: list[float] = []
        self._lock = asyncio.Lock()
    async def acquire(self):
        """Wait until we can make a request within rate limits."""
        async with self._lock:
            now = asyncio.get_event_loop().time()
            cutoff = now - 10.0  # 10 seconds
            self.request_times = [t for t in self.request_times if t > cutoff]
            if (
                len(self.request_times) >= REQUESTS_PER_MIN // 6
            ):  # we look at it every 10 seconds instead of minutes
                wait_time = self.request_times[0] - cutoff
                if wait_time > 0:
                    warnings.warn(
                        f"Rate limit reached ({REQUESTS_PER_MIN}/min), "
                        f"waiting {wait_time:.1f}s",
                        stacklevel=1,
                    )
                    await asyncio.sleep(wait_time)
            self.request_times.append(asyncio.get_event_loop().time())
--- a/pipeline/journey_times/results.py
+++ b/pipeline/journey_times/results.py
@ -0,0 +1,85 @@
 from pathlib import Path
 from typing import Callable
 import polars as pl
 from .config import CHECKPOINT_INTERVAL, OUTPUT_DIR
 from .models import JourneyResult
 def results_to_dataframe(results: list[JourneyResult]) -> pl.DataFrame:
    return pl.DataFrame(
        [
            {
                "postcode": r.postcode,
                "public_transport_easy_minutes": r.public_transport_easy_minutes,
                "public_transport_quick_minutes": r.public_transport_quick_minutes,
                "cycling_minutes": r.cycling_minutes,
                "error": r.error,
            }
            for r in results
        ]
    )
 class CheckpointSaver:
    """Collects results and saves checkpoints at regular intervals."""
    def __init__(
        self,
        destination_name: str,
        output_dir: Path | None = None,
        interval: int = CHECKPOINT_INTERVAL,
        on_save: Callable[[Path, int], None] | None = None,
    ):
        self.destination_name = destination_name
        self.output_dir = output_dir or OUTPUT_DIR
        self.interval = interval
        self.on_save = on_save
        self.results: list[JourneyResult] = []
        self._last_save_count = 0
    def add_result(self, result: JourneyResult) -> None:
        """Add a result and save checkpoint if interval is reached."""
        self.results.append(result)
        if len(self.results) - self._last_save_count >= self.interval:
            self.save_checkpoint()
    def save_checkpoint(self) -> Path:
        """Save current results to checkpoint file."""
        df = results_to_dataframe(self.results)
        path = self._checkpoint_path()
        df.write_parquet(path)
        self._last_save_count = len(self.results)
        if self.on_save:
            self.on_save(path, len(self.results))
        return path
    def _checkpoint_path(self) -> Path:
        safe_name = self.destination_name.lower().replace(" ", "-")
        return self.output_dir / f"journey_times_{safe_name}_checkpoint.parquet"
    def get_results(self) -> list[JourneyResult]:
        """Return all collected results."""
        return self.results
    def cleanup_checkpoint(self) -> None:
        """Remove the checkpoint file after successful completion."""
        path = self._checkpoint_path()
        if path.exists():
            path.unlink()
 def save_results(
    results: pl.DataFrame,
    destination_name: str,
    output_dir: Path | None = None,
 ) -> Path:
    if output_dir is None:
        output_dir = OUTPUT_DIR
    safe_name = destination_name.lower().replace(" ", "-")
    parquet_path = output_dir / f"journey_times_{safe_name}.parquet"
    results.write_parquet(parquet_path)
    return parquet_path
--- a/pipeline/journey_times/tfl_client.py
+++ b/pipeline/journey_times/tfl_client.py
@ -0,0 +1,254 @@
 import asyncio
 import os
 from typing import Literal
 import warnings
 from collections.abc import Callable
 from http import HTTPStatus
 import httpx
 from .config import MAX_DELAY
 from .models import Destination, JourneyResult
 from .rate_limiter import RateLimiter
 BASE_URL = "https://api.tfl.gov.uk"
 async def fetch_journey_for_mode(
    client: httpx.AsyncClient,
    rate_limiter: RateLimiter,
    from_location: str,
    to_location: str,
    journey_date: str,
    journey_time: str,
    journey_type: Literal["quick"] | Literal["easy"] | Literal["cycle"],
    retry_count: int = 5,
 ) -> int | None:
    """Fetch journey time for a specific mode with rate limiting."""
    backoff = 1.0
    for attempt in range(retry_count):
        try:
            await rate_limiter.acquire()
            journey_preference = {
                "quick": "LeastTime",
                "easy": "LeastInterchange",
                "cycle": None,
            }[journey_type]
            cycle_preference = {
                "quick": None,
                "easy": None,
                "cycle": "AllTheWay",
            }[journey_type]
            # curl -s "https://api.tfl.gov.uk/Journey/Meta/Modes" | jq '.[].modeName'
            mode = {
                "quick": [
                    "bus",
                    "overground",
                    "national-rail",
                    "international-rail",
                    "elizabeth-line",
                    "tube",
                    "coach",
                    "dlr",
                    "cable-car",
                    "replacement-bus",
                    "tram",
                    "river-bus",
                    "walking",
                    "cycle",
                ],
                "easy": [
                    "bus",
                    "overground",
                    "national-rail",
                    "international-rail",
                    "elizabeth-line",
                    "replacement-bus",
                    "tube",
                    "coach",
                    "dlr",
                    "cable-car",
                    "tram",
                    "river-bus",
                ],
                "cycle": ["cycle"],
            }[journey_type]
            params: dict = {
                "date": journey_date,
                "time": journey_time,
                "nationalSearch": "true",
                "timeIs": "Arriving",
                "cyclePreference": cycle_preference,
                "bikeProficiency": "Fast",
                "walkingOptimization": str(journey_type == "quick").lower(),
                "mode": ",".join(mode),
            }
            if journey_preference:
                params["journeyPreference"] = journey_preference
            url = f"/Journey/JourneyResults/{from_location}/to/{to_location}"
            response = await client.get(url, params=params)
            if response.status_code == HTTPStatus.OK:
                data = response.json()
                journeys = data.get("journeys", [])
                if journeys:
                    durations = [
                        j["duration"] for j in journeys if j.get("duration") is not None
                    ]
                    if durations:
                        return min(durations)
                return None
            elif response.status_code in (
                HTTPStatus.TOO_MANY_REQUESTS,
                HTTPStatus.INTERNAL_SERVER_ERROR,
                HTTPStatus.BAD_GATEWAY,
                HTTPStatus.SERVICE_UNAVAILABLE,
                HTTPStatus.GATEWAY_TIMEOUT,
            ):
                warnings.warn(
                    f"HTTP {response.status_code} for {journey_type} from {from_location}, "
                    f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
                    stacklevel=2,
                )
                await asyncio.sleep(backoff)
                backoff = min(backoff * 2, MAX_DELAY)
                continue
            else:
                return None
        except Exception as e:
            warnings.warn(
                f"Network error for {journey_type} from {from_location}: {e}, "
                f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
                stacklevel=2,
            )
            await asyncio.sleep(backoff)
            backoff = min(backoff * 2, MAX_DELAY)
            continue
    warnings.warn(
        f"Failed to fetch {journey_type} from {from_location} after {retry_count} attempts",
        stacklevel=2,
    )
    return None
 async def fetch_all_modes(
    client: httpx.AsyncClient,
    rate_limiter: RateLimiter,
    postcode: str,
    lat: float,
    lon: float,
    to_location: str,
    journey_date: str,
    journey_time: str,
    semaphore: asyncio.Semaphore,
 ) -> JourneyResult:
    """Fetch journey times for all transport modes using coordinates."""
    async with semaphore:
        try:
            from_location = f"{lat},{lon}"
            easy = await fetch_journey_for_mode(
                client,
                rate_limiter,
                from_location,
                to_location,
                journey_date,
                journey_time,
                journey_type="easy",
            )
            quick = await fetch_journey_for_mode(
                client,
                rate_limiter,
                from_location,
                to_location,
                journey_date,
                journey_time,
                journey_type="quick",
            )
            cycling = await fetch_journey_for_mode(
                client,
                rate_limiter,
                from_location,
                to_location,
                journey_date,
                journey_time,
                journey_type="cycle",
            )
            return JourneyResult(
                postcode=postcode,
                public_transport_easy_minutes=easy,
                public_transport_quick_minutes=quick,
                cycling_minutes=cycling,
            )
        except Exception as e:
            print(f"Error: {e}")
            return JourneyResult(postcode=postcode, error=str(e))
 async def fetch_journey_times(
    postcode_data: list[tuple[str, float, float]],
    dest: Destination,
    journey_date: str,
    journey_time: str,
    max_concurrent: int = 2,
    progress_callback: Callable[[JourneyResult], None] | None = None,
 ) -> list[JourneyResult]:
    """Fetch journey times for all postcodes with rate limiting.
    Args:
        postcode_data: List of (postcode, lat, lon) tuples
        dest: Destination for journey planning
        journey_date: Date in YYYYMMDD format
        journey_time: Time in HHMM format
        max_concurrent: Maximum concurrent API requests
        progress_callback: Optional callback called with each result
    Returns:
        List of JourneyResult objects in the same order as postcode_data
    """
    semaphore = asyncio.Semaphore(max_concurrent)
    to_location = dest.to_tfl_location()
    rate_limiter = RateLimiter()
    # TFL API authentication via app_key query parameter
    tfl_token = os.environ.get("TFL_TOKEN")
    if not tfl_token:
        raise RuntimeError("TFL_TOKEN environment variable not set")
    params = {"app_key": tfl_token}
    async with httpx.AsyncClient(
        base_url=BASE_URL,
        params=params,
        timeout=httpx.Timeout(30),
    ) as client:
        tasks = [
            fetch_all_modes(
                client,
                rate_limiter,
                pc,
                lat,
                lon,
                to_location,
                journey_date,
                journey_time,
                semaphore,
            )
            for pc, lat, lon in postcode_data
        ]
        results = []
        for coro in asyncio.as_completed(tasks):
            result = await coro
            results.append(result)
            if progress_callback:
                progress_callback(result)
        postcode_to_result = {r.postcode: r for r in results}
        return [postcode_to_result[pc] for pc, _, _ in postcode_data]
--- a/pipeline/processors/h3_aggregator.py
+++ b/pipeline/processors/h3_aggregator.py
@ -1,42 +0,0 @@
 from pathlib import Path
 import polars as pl
 from pipeline.config import AGGREGATES_DIR, H3_RESOLUTIONS
 def aggregate(df: pl.LazyFrame, resolution: int) -> pl.LazyFrame:
    """Aggregate property data by H3 cell and year."""
    h3_col = f"h3_res{resolution}"
    return (
        df.group_by(h3_col, "year")
        .agg(
            pl.len().alias("count"),
            pl.col("price").mean().alias("avg_price"),
            pl.col("price").median().alias("median_price"),
            pl.col("price").min().alias("min_price"),
            pl.col("price").max().alias("max_price"),
        )
        .rename({h3_col: "h3"})
    )
 def aggregate_all(df: pl.LazyFrame) -> dict[int, pl.LazyFrame]:
    """Aggregate at all H3 resolutions."""
    return {res: aggregate(df, res) for res in H3_RESOLUTIONS}
 def save_aggregates(df: pl.LazyFrame, output_dir: Path | None = None) -> list[Path]:
    """Aggregate and save at all H3 resolutions."""
    output_dir = output_dir or AGGREGATES_DIR
    output_dir.mkdir(parents=True, exist_ok=True)
    saved_paths = []
    aggregates = aggregate_all(df)
    for res, agg_df in aggregates.items():
        output_path = output_dir / f"res{res}.parquet"
        agg_df.collect().write_parquet(output_path)
        saved_paths.append(output_path)
    return saved_paths
--- a/pipeline/run.py
+++ b/pipeline/run.py
@ -1,35 +0,0 @@
 """Pipeline CLI to process property data with H3 spatial indexing."""
 import polars as pl
 from pipeline.sources.postcodes import save_postcodes
 from pipeline.sources.property_prices import PropertyPricesSource
 from pipeline.processors.h3_aggregator import save_aggregates
 def run_pipeline():
    """Run the full data processing pipeline."""
    print("=" * 60)
    print("Property Map Data Pipeline")
    print("=" * 60)
    # Step 1: Process postcodes with H3 indices
    print("\n[1/3] Processing postcodes with H3 indices...")
    postcodes_path = save_postcodes()
    print(f"      Saved: {postcodes_path}")
    print("\n[2/3] Processing property prices...")
    postcodes = pl.scan_parquet(postcodes_path)
    property_source = PropertyPricesSource()
    properties = property_source.process(postcodes)
    print("      Joined property prices with postcodes")
    print("\n[3/3] Aggregating at H3 resolutions...")
    saved_paths = save_aggregates(properties)
    for path in saved_paths:
        size_mb = path.stat().st_size / (1024 * 1024)
        print(f"      Saved: {path.name} ({size_mb:.1f} MB)")
 if __name__ == "__main__":
    run_pipeline()
--- a/pipeline/sources/init.py
+++ b/pipeline/sources/init.py
--- a/pipeline/sources/postcodes.py
+++ b/pipeline/sources/postcodes.py
@ -1,49 +0,0 @@
 from pathlib import Path
 import polars as pl
 import h3
 from pipeline.config import DATA_DIR, H3_RESOLUTIONS, PROCESSED_DIR
 def lat_long_to_h3(lat: float, long: float, resolution: int) -> str:
    """Convert lat/long to H3 index at given resolution."""
    return h3.latlng_to_cell(lat, long, resolution)
 def load_postcodes() -> pl.LazyFrame:
    """Load postcode data from arcgis parquet file."""
    return pl.scan_parquet(DATA_DIR / "arcgis_data.parquet").select(
        pl.col("pcds").alias("postcode"),
        pl.col("lat"),
        pl.col("long"),
    )
 def process_postcodes() -> pl.LazyFrame:
    """Process postcodes and add H3 indices at multiple resolutions."""
    df = load_postcodes().collect()
    for res in H3_RESOLUTIONS:
        col_name = f"h3_res{res}"
        df = df.with_columns(
            pl.struct(["lat", "long"])
            .map_elements(
                # Capture res by value using default argument to avoid closure bug
                lambda x, res=res: lat_long_to_h3(x["lat"], x["long"], res),
                return_dtype=pl.Utf8,
            )
            .alias(col_name)
        )
    return df.lazy()
 def save_postcodes(output_path: Path | None = None) -> Path:
    """Process and save postcodes with H3 indices."""
    output_path = output_path or PROCESSED_DIR / "postcodes_h3.parquet"
    output_path.parent.mkdir(parents=True, exist_ok=True)
    df = process_postcodes().collect()
    df.write_parquet(output_path)
    return output_path
--- a/pipeline/sources/property_prices.py
+++ b/pipeline/sources/property_prices.py
@ -1,41 +0,0 @@
 import polars as pl
 from pipeline.base import DataSource
 from pipeline.config import DATA_DIR, H3_RESOLUTIONS
 class PropertyPricesSource(DataSource):
    """Land Registry property prices data source."""
    @property
    def name(self) -> str:
        return "property_prices"
    def load(self) -> pl.LazyFrame:
        """Load raw property prices data."""
        return pl.scan_parquet(DATA_DIR / "pp-complete.parquet")
    def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
        """Process and join with postcode coordinates and H3 indices."""
        prices = self.load().select(
            pl.col("price"),
            pl.col("date_of_transfer").dt.year().alias("year"),
            pl.col("property_type"),
            pl.col("postcode"),
        )
        joined = prices.join(
            postcodes,
            on="postcode",
            how="inner",
        )
        h3_cols = [pl.col(f"h3_res{res}") for res in H3_RESOLUTIONS]
        return joined.select(
            pl.col("price"),
            pl.col("year"),
            pl.col("property_type"),
            pl.col("lat"),
            pl.col("long"),
            *h3_cols,
        )
--- a/pipeline/transform/crime.py
+++ b/pipeline/transform/crime.py
@ -0,0 +1,63 @@
 import argparse
 from pathlib import Path
 import polars as pl
 def transform_crime(crime_dir: Path, output_path: Path) -> None:
    csvs = sorted(crime_dir.rglob("*.csv"))
    print(f"Found {len(csvs)} CSV files across {len(list(crime_dir.iterdir()))} months")
    df = pl.scan_csv(
        csvs,
        schema_overrides={"LSOA code": pl.Utf8, "Crime type": pl.Utf8, "Month": pl.Utf8},
    ).select("LSOA code", "Crime type", "Month")
    # Extract year, count crimes per LSOA / year / crime type
    yearly_counts = (
        df.filter(pl.col("LSOA code").is_not_null() & (pl.col("LSOA code") != ""))
        .with_columns(pl.col("Month").str.slice(0, 4).alias("year"))
        .group_by("LSOA code", "year", "Crime type")
        .agg(pl.len().alias("count"))
        .group_by("LSOA code", "Crime type")
        .agg(pl.col("count").mean().round(1).alias("yearly_avg"))
        .collect(engine="streaming")
    )
    print(f"Crime types: {sorted(yearly_counts['Crime type'].unique().to_list())}")
    # Pivot crime types into columns
    wide = yearly_counts.pivot(
        on="Crime type",
        index="LSOA code",
        values="yearly_avg",
    )
    # Fill nulls with 0 and rename columns to be descriptive
    value_cols = [col for col in wide.columns if col != "LSOA code"]
    wide = wide.with_columns(pl.col(col).fill_null(0) for col in value_cols)
    wide = wide.rename({col: f"{col} (avg/yr)" for col in value_cols})
    print(f"Output shape: {wide.shape}")
    print(f"Columns: {wide.columns}")
    wide.write_parquet(output_path, compression="zstd")
    print(f"Saved to {output_path}")
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="Transform crime CSVs into yearly average by LSOA and crime type"
    )
    parser.add_argument(
        "--input", type=Path, required=True, help="Directory containing crime data"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    transform_crime(args.input, args.output)
 if __name__ == "__main__":
    main()
--- a/pipeline/transform/join_epc_pp.py
+++ b/pipeline/transform/join_epc_pp.py
@ -0,0 +1,161 @@
 import argparse
 import polars as pl
 from pathlib import Path
 from ..utils import fuzzy_join_on_postcode
 MIN_FLOOR_AREA_M2 = 10
 pl.Config.set_tbl_cols(-1)
 def main():
    parser = argparse.ArgumentParser(description="Fuzzy join EPC and Price Paid data")
    parser.add_argument(
        "--epc", type=Path, required=True, help="EPC certificates CSV file"
    )
    parser.add_argument(
        "--price-paid", type=Path, required=True, help="Price paid parquet file"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    epc = (
        pl.scan_csv(args.epc)
        .select(
            pl.col("ADDRESS").alias("epc_address"),
            "POSTCODE",
            "CURRENT_ENERGY_RATING",
            "POTENTIAL_ENERGY_RATING",
            pl.col("PROPERTY_TYPE").alias("epc_property_type"),
            "BUILT_FORM",
            "INSPECTION_DATE",
            "TOTAL_FLOOR_AREA",
            "NUMBER_HABITABLE_ROOMS",
            "FLOOR_HEIGHT",
            "CONSTRUCTION_AGE_BAND",
        )
        .filter(pl.col("epc_address").is_not_null())
        .sort("INSPECTION_DATE", descending=True)
        .group_by("epc_address", "POSTCODE")
        .first()
    )
    print("EPC dataset")
    print(epc.head().collect())
    # https://www.gov.uk/guidance/about-the-price-paid-data
    property_type_map = {
        "D": "Detached",
        "S": "Semi-Detached",
        "T": "Terraced",
        "F": "Flats/Maisonettes",
        "O": "Other",
    }
    duration_map = {"F": "Freehold", "L": "Leasehold"}
    price_paid = (
        pl.scan_parquet(args.price_paid)
        .select(
            "price",
            "date_of_transfer",
            pl.col("property_type")
            .alias("pp_property_type")
            .replace(property_type_map),
            "postcode",
            "paon",
            "saon",
            "street",
            "locality",
            "town_city",
            pl.col("duration").replace(duration_map),
            "old_new",
        )
        .filter(pl.col("pp_property_type") != "Other")
        .with_columns(
            pl.concat_str(
                [pl.col("saon"), pl.col("paon"), pl.col("street")],
                separator=" ",
                ignore_nulls=True,
            ).alias("pp_address"),
        )
        .sort("date_of_transfer")
        .group_by("pp_address", "postcode", maintain_order=True)
        .agg(
            pl.struct(
                pl.col("date_of_transfer").dt.year().alias("year"),
                "price",
            ).alias("historical_prices"),
            pl.col("pp_property_type").last(),
            pl.col("duration").last(),
            pl.col("price").last().alias("latest_price"),
            pl.col("date_of_transfer").last(),
            pl.col("date_of_transfer").first().alias("first_transfer_date"),
            pl.col("old_new").first(),
        )
    ).filter(pl.col("pp_address").is_not_null())
    print("Price paid dataset")
    print(price_paid.head().collect())
    joined = (
        fuzzy_join_on_postcode(
            left=price_paid,
            right=epc,
            left_address_col="pp_address",
            right_address_col="epc_address",
            left_postcode_col="postcode",
            right_postcode_col="POSTCODE",
        )
        .drop("POSTCODE")
        .collect(engine="streaming")
    )
    matched = joined.filter(
        pl.col("epc_address").is_not_null() & pl.col("pp_address").is_not_null()
    )
    total = joined.height
    print(f"Unique properties: {total}")
    print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
    print(f"Unmatched: {total - matched.height}")
    matched = matched.filter(pl.col("TOTAL_FLOOR_AREA") >= MIN_FLOOR_AREA_M2)
    # For new-builds (old_new == "Y"), use the first transaction date year as
    # the exact construction date; otherwise fall back to the EPC age band.
    epc_band_year = (
        pl.col("CONSTRUCTION_AGE_BAND")
        .str.replace("England and Wales: ", "")
        .str.replace(" onwards", "")
        .str.extract(r"(\d{4})", 1)
        .cast(pl.UInt16, strict=False)
    )
    transfer_year = (
        pl.col("first_transfer_date").dt.year().cast(pl.UInt16, strict=False)
    )
    is_new_build = pl.col("old_new") == "Y"
    matched = matched.with_columns(
        pl.when(is_new_build & transfer_year.is_not_null())
        .then(transfer_year)
        .otherwise(epc_band_year)
        .alias("CONSTRUCTION_AGE_BAND"),
        pl.when(is_new_build & transfer_year.is_not_null())
        .then(pl.lit(0, dtype=pl.UInt8))
        .when(epc_band_year.is_not_null())
        .then(pl.lit(1, dtype=pl.UInt8))
        .otherwise(pl.lit(None, dtype=pl.UInt8))
        .alias("is_construction_date_approximate"),
    ).drop("old_new", "first_transfer_date")
    matched = matched.rename({col: col.lower() for col in joined.columns})
    print(matched.head())
    matched.write_parquet(args.output)
    print(f"Wrote {args.output}")
 if __name__ == "__main__":
    main()
--- a/pipeline/transform/merge.py
+++ b/pipeline/transform/merge.py
@ -0,0 +1,290 @@
 import argparse
 import polars as pl
 from pathlib import Path
 MIN_PRICE = 10_000
 MIN_FLOOR_AREA_M2 = 10
 def _build_wide(
    epc_pp_path: Path,
    arcgis_path: Path,
    iod_path: Path,
    poi_proximity_path: Path,
    journey_times_path: Path,
    ethnicity_path: Path,
    crime_path: Path,
    noise_path: Path,
    school_proximity_path: Path,
    broadband_path: Path,
 ) -> pl.DataFrame:
    """Build the wide dataframe by joining epc_pp with all auxiliary data."""
    wide = pl.scan_parquet(epc_pp_path)
    arcgis = pl.scan_parquet(arcgis_path).select(
        pl.col("pcds").alias("postcode"),
        "lat",
        pl.col("long").alias("lon"),
        "lsoa21",
        "oa21",
    )
    wide = wide.join(arcgis, on="postcode", how="inner")
    journey_times = (
        pl.scan_parquet(journey_times_path)
        .select(
            "postcode",
            "public_transport_easy_minutes",
            "public_transport_quick_minutes",
            "cycling_minutes",
        )
        .sort("public_transport_quick_minutes", nulls_last=True)
        .group_by("postcode")
        .first()
    )
    wide = wide.join(journey_times, on="postcode", how="left")
    iod = pl.scan_parquet(iod_path)
    wide = wide.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
    ethnicity = pl.scan_parquet(ethnicity_path)
    wide = wide.join(
        ethnicity,
        left_on="Local Authority District code (2024)",
        right_on="Geography_code",
        how="left",
    )
    crime = pl.scan_parquet(crime_path)
    wide = wide.join(crime, left_on="lsoa21", right_on="LSOA code", how="left")
    wide = wide.with_columns(
        pl.sum_horizontal(
            "Violence and sexual offences (avg/yr)",
            "Robbery (avg/yr)",
            "Burglary (avg/yr)",
            "Possession of weapons (avg/yr)",
        ).alias("serious_crime_avg_yr"),
        pl.sum_horizontal(
            "Anti-social behaviour (avg/yr)",
            "Criminal damage and arson (avg/yr)",
            "Shoplifting (avg/yr)",
            "Bicycle theft (avg/yr)",
            "Theft from the person (avg/yr)",
            "Other theft (avg/yr)",
            "Vehicle crime (avg/yr)",
            "Public order (avg/yr)",
            "Drugs (avg/yr)",
            "Other crime (avg/yr)",
        ).alias("minor_crime_avg_yr"),
    )
    poi_counts = pl.scan_parquet(poi_proximity_path)
    wide = wide.join(poi_counts, on="postcode", how="left")
    noise_cols = ["road_noise_lden_db", "rail_noise_lden_db", "airport_noise_lden_db"]
    noise = (
        pl.scan_parquet(noise_path)
        .with_columns(
            # NaN → null so max_horizontal ignores missing instead of propagating NaN
            *[pl.col(c).fill_nan(None) for c in noise_cols],
        )
        .with_columns(
            pl.max_horizontal(*noise_cols).fill_null(0).alias("noise_lden_db"),
        )
        .select("postcode", "noise_lden_db")
    )
    wide = wide.join(noise, on="postcode", how="left")
    school_proximity = pl.scan_parquet(school_proximity_path)
    wide = wide.join(school_proximity, on="postcode", how="left")
    # Broadband: derive max available download speed tier per postcode from
    # Ofcom availability percentages.  Tiers: Gigabit ≥1000, UFBB ≥300,
    # UFBB(100) ≥100, SFBB ≥30 Mbps.
    broadband = (
        pl.scan_parquet(broadband_path)
        .select(
            pl.col("postcode_space").alias("bb_postcode"),
            pl.when(pl.col("Gigabit availability (% premises)") > 0)
            .then(1000)
            .when(pl.col("UFBB availability (% premises)") > 0)
            .then(300)
            .when(pl.col("UFBB (100Mbit/s) availability (% premises)") > 0)
            .then(100)
            .when(pl.col("SFBB availability (% premises)") > 0)
            .then(30)
            .otherwise(10)
            .cast(pl.UInt16)
            .alias("max_download_speed"),
        )
        .group_by("bb_postcode")
        .agg(pl.col("max_download_speed").max())
    )
    wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
    wide = wide.with_columns(
        pl.when(pl.col("pp_property_type") == pl.col("built_form"))
        .then(pl.col("pp_property_type"))
        .otherwise(
            pl.concat_str(
                [pl.col("pp_property_type"), pl.lit("/"), pl.col("built_form")]
            )
        )
        .alias("property_type_built_form")
    )
    wide = (
        wide.filter(pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
        .filter(pl.col("latest_price") >= MIN_PRICE)
        .with_columns(
            pl.when(pl.col("duration") == "U")
            .then(None)
            .otherwise(pl.col("duration"))
            .alias("duration"),
            pl.when(pl.col("current_energy_rating") == "INVALID!")
            .then(None)
            .otherwise(pl.col("current_energy_rating"))
            .alias("current_energy_rating"),
        )
        .with_columns(
            (pl.col("latest_price") / pl.col("total_floor_area"))
            .round(0)
            .cast(pl.Int32)
            .alias("Price per sqm"),
        )
        .drop(
            "date_of_transfer",
            "inspection_date",
            "floor_height",
            "LSOA name (2021)",
            "Local Authority District code (2024)",
            "Local Authority District name (2024)",
            "Wider Barriers Sub-domain Score",
            "Geographical Barriers Sub-domain Score",
            "Adult Skills Sub-domain Score",
            "Children and Young People Sub-domain Score",
            "Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
            "Income Deprivation Affecting Children Index (IDACI) Score (rate)",
            "Barriers to Housing and Services Score",
            "lsoa21",
            "oa21",
            "pp_property_type",
            "built_form",
        )
        .rename(
            {
                "construction_age_band": "Approximate construction age",
                "is_construction_date_approximate": "Is construction date approximate",
                "pp_address": "Address per Property Register",
                "epc_address": "Address per EPC",
                "postcode": "Postcode",
                "duration": "Leashold/Freehold",
                "current_energy_rating": "Current energy rating",
                "potential_energy_rating": "Potential energy rating",
                "total_floor_area": "Total floor area (sqm)",
                "epc_property_type": "Property type",
                "property_type_built_form": "Property type/built form",
                "restaurants_2km": "Restaurants within 2km",
                "groceries_2km": "Groceries within 2km",
                "parks_2km": "Parks within 2km",
                "public_transport_2km": "Public transport within 2km",
                "latest_price": "Last known price",
                "number_habitable_rooms": "Number of bedrooms & living rooms",
                "noise_lden_db": "Noise (dB)",
                "good_primary_5km": "Good+ primary schools within 5km",
                "good_secondary_5km": "Good+ secondary schools within 5km",
                "max_download_speed": "Max available download speed (Mbps)",
                "serious_crime_avg_yr": "Serious crime (avg/yr)",
                "minor_crime_avg_yr": "Minor crime (avg/yr)",
            }
        )
    )
    print("Collecting with streaming engine...")
    return wide.collect(engine="streaming")
 def main():
    parser = argparse.ArgumentParser(
        description="Build wide property dataframe with all joins"
    )
    parser.add_argument(
        "--epc-pp", type=Path, required=True, help="EPC-Price Paid joined parquet file"
    )
    parser.add_argument(
        "--arcgis", type=Path, required=True, help="ArcGIS postcode data parquet file"
    )
    parser.add_argument(
        "--iod",
        type=Path,
        required=True,
        help="Index of Deprivation parquet file (optional)",
    )
    parser.add_argument(
        "--poi-proximity",
        type=Path,
        help="POI proximity counts parquet file (optional)",
    )
    parser.add_argument(
        "--journey-times",
        required=True,
        type=Path,
        help="Journey times parquet file (optional)",
    )
    parser.add_argument(
        "--ethnicity",
        type=Path,
        required=True,
        help="Ethnicity by local authority parquet file (optional)",
    )
    parser.add_argument(
        "--crime",
        type=Path,
        required=True,
        help="Crime by LSOA parquet file (optional)",
    )
    parser.add_argument(
        "--noise", type=Path, required=True, help="Road noise by postcode parquet file"
    )
    parser.add_argument(
        "--school-proximity",
        type=Path,
        required=True,
        help="School proximity counts parquet file",
    )
    parser.add_argument(
        "--broadband",
        type=Path,
        required=True,
        help="Broadband performance by output area parquet file",
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    args = parser.parse_args()
    wide = _build_wide(
        epc_pp_path=args.epc_pp,
        arcgis_path=args.arcgis,
        iod_path=args.iod,
        poi_proximity_path=args.poi_proximity,
        journey_times_path=args.journey_times,
        ethnicity_path=args.ethnicity,
        crime_path=args.crime,
        noise_path=args.noise,
        school_proximity_path=args.school_proximity,
        broadband_path=args.broadband,
    )
    print(f"Columns: {wide.columns}")
    print(f"Rows: {wide.height}")
    wide.write_parquet(args.output)
    size_mb = args.output.stat().st_size / (1024 * 1024)
    print(f"Wrote {args.output} ({size_mb:.1f} MB)")
 if __name__ == "__main__":
    main()
--- a/pipeline/transform/poi_proximity.py
+++ b/pipeline/transform/poi_proximity.py
@ -0,0 +1,52 @@
 """Compute POI proximity counts per postcode from ArcGIS + filtered POIs."""
 import argparse
 from pathlib import Path
 import polars as pl
 from pipeline.utils.poi_counts import _count_pois_per_postcode
 # POI category groups for proximity counting
 POI_GROUPS = {
    "restaurants": ["Restaurant", "Fast Food"],
    "groceries": ["Greengrocer", "Grocery Shop", "Supermarket", "Convenience Store"],
    "parks": ["Park", "Garden", "Nature Reserve"],
    "public_transport": ["Metro or Tram stop", "Rail station", "Bus stop", "Bus station"], # comes from naptan.py
 }
 def main():
    parser = argparse.ArgumentParser(
        description="Count POIs within radius per postcode"
    )
    parser.add_argument(
        "--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
    )
    parser.add_argument(
        "--pois", type=Path, required=True, help="Filtered POIs parquet"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet path"
    )
    args = parser.parse_args()
    postcodes = pl.read_parquet(args.arcgis).select(
        pl.col("pcds").alias("postcode"),
        "lat",
        pl.col("long").alias("lon"),
    )
    pois = pl.read_parquet(args.pois)
    result = _count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2)
    result.write_parquet(args.output)
    size_mb = args.output.stat().st_size / (1024 * 1024)
    print(f"Wrote {args.output} ({size_mb:.1f} MB)")
 if __name__ == "__main__":
    main()
--- a/pipeline/transform/school_proximity.py
+++ b/pipeline/transform/school_proximity.py
@ -0,0 +1,73 @@
 """Compute good-rated school proximity counts per postcode."""
 import argparse
 from pathlib import Path
 import polars as pl
 from pipeline.utils.poi_counts import _count_pois_per_postcode
 SCHOOL_GROUPS = {
    "good_primary": ["good_primary"],
    "good_secondary": ["good_secondary"],
 }
 def main():
    parser = argparse.ArgumentParser(
        description="Count good+ primary/secondary schools within 2km per postcode"
    )
    parser.add_argument(
        "--ofsted", type=Path, required=True, help="Ofsted inspection parquet"
    )
    parser.add_argument(
        "--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet path"
    )
    args = parser.parse_args()
    # Load Ofsted data: filter to good+ (1, 2) primary/secondary schools
    ofsted = pl.read_parquet(args.ofsted).filter(
        pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
        & pl.col("Overall effectiveness").is_in(["1", "2"])
    )
    print(f"Good+ schools: {len(ofsted):,}")
    # Assign category based on phase
    ofsted = ofsted.with_columns(
        pl.when(pl.col("Ofsted phase") == "Primary")
        .then(pl.lit("good_primary"))
        .otherwise(pl.lit("good_secondary"))
        .alias("category")
    ).select(
        pl.col("Postcode").alias("postcode"),
        "category",
    )
    # Join with arcgis to get lat/lng for each school's postcode
    arcgis = pl.read_parquet(args.arcgis).select(
        pl.col("pcds").alias("postcode"),
        "lat",
        pl.col("long").alias("lng"),
    )
    schools = ofsted.join(arcgis, on="postcode", how="inner")
    print(f"Schools with coordinates: {len(schools):,}")
    # Load all postcodes for proximity counting
    postcodes = arcgis.rename({"lng": "lon"})
    result = _count_pois_per_postcode(
        postcodes, schools, radius_km=5, groups=SCHOOL_GROUPS
    )
    result.write_parquet(args.output)
    size_mb = args.output.stat().st_size / (1024 * 1024)
    print(f"Wrote {args.output} ({size_mb:.1f} MB)")
 if __name__ == "__main__":
    main()
--- a/pipeline/transform/transform_poi.py
+++ b/pipeline/transform/transform_poi.py
@ -0,0 +1,673 @@
 import argparse
 import warnings
 from pathlib import Path
 import polars as pl
 DROP_CATEGORIES = {
    "amenity/advice",
    "amenity/atm",
    "amenity/bbq",
    "amenity/bench",
    "amenity/bicycle_parking",
    "amenity/clock",
    "amenity/fixme",
    "amenity/grit_bin",
    "amenity/hunting_stand",
    "amenity/motorcycle_parking",
    "amenity/notice_board",
    "amenity/parking",
    "amenity/parking_entrance",
    "amenity/parking_space",
    "amenity/post_box",
    "amenity/telephone",
    "amenity/toilets",
    "amenity/vacuum_cleaner",
    "amenity/waste_basket",
    "building/air_shaft",
    "building/apartments",
    "building/detached",
    "building/entrance",
    "building/entry",
    "building/garage",
    "building/garages",
    "building/house",
    "building/hut",
    "building/no",
    "building/office",
    "building/public",
    "building/residential",
    "building/roof",
    "building/shed",
    "building/terrace",
    "building/yes",
    "emergency/access_point",
    "emergency/ambulance_station",
    "emergency/assembly_point",
    "emergency/bleed_control_kit",
    "emergency/defibrillator",
    "emergency/designated",
    "emergency/dry_riser_inlet",
    "emergency/emergency_ward_entrance",
    "emergency/fire_alarm_box",
    "emergency/fire_extinguisher",
    "emergency/fire_hydrant",
    "emergency/fire_service_inlet",
    "emergency/first_aid_kit",
    "emergency/life_ring",
    "emergency/lifeguard",
    "emergency/no",
    "emergency/phone",
    "emergency/rescue_equipment",
    "emergency/siren",
    "emergency/throw_bag",
    "emergency/water_rescue",
    "emergency/yes",
    "leisure/firepit",
    "leisure/fishing",
    "leisure/picnic_table",
    "office/company",
    "office/yes",
    "tourism/apartment",
    "tourism/apartments",
    "tourism/camp_pitch",
    "tourism/information",
    "tourism/village_sign",
    "tourism/yes",
    # public transport comes from naptan
    "public_transport/entrance",
    "public_transport/platform",
    "public_transport/station",
    "public_transport/stop_position",
 }
 # (friendly_name, emoji) for every category we keep
 CATEGORY_MAP: dict[str, tuple[str, str]] = {
    # amenity
    "amenity/animal_boarding": ("Animal Boarding", "🐾"),
    "amenity/animal_breeding": ("Animal Breeding", "🐣"),
    "amenity/animal_shelter": ("Animal Shelter", "🏠"),
    "amenity/arts_centre": ("Arts Centre", "🎨"),
    "amenity/bank": ("Bank", "🏦"),
    "amenity/bar": ("Bar", "🍸"),
    "amenity/bicycle_rental": ("Bike Rental", "🚲"),
    "amenity/bicycle_repair_station": ("Bike Repair", "🔧"),
    "amenity/binoculars": ("Public Binoculars", "🔭"),
    "amenity/boat_rental": ("Boat Rental", "⛵"),
    "amenity/boat_storage": ("Boat Storage", "🚢"),
    "amenity/boot_scraper": ("Boot Scraper", "🥾"),
    "amenity/bureau_de_change": ("Currency Exchange", "💱"),
    "amenity/bus_station": ("Bus Station", "🚌"),
    "amenity/cafe": ("Café", "☕"),
    "amenity/car_rental": ("Car Rental", "🚗"),
    "amenity/car_sharing": ("Car Sharing", "🚙"),
    "amenity/car_wash": ("Car Wash", "🧽"),
    "amenity/care_home": ("Care Home", "🏥"),
    "amenity/casino": ("Casino", "🎰"),
    "amenity/charging_station": ("EV Charging", "🔌"),
    "amenity/check_in": ("Check-In Point", "✅"),
    "amenity/childcare": ("Childcare", "👶"),
    "amenity/cinema": ("Cinema", "🎬"),
    "amenity/clinic": ("Clinic", "🩺"),
    "amenity/club": ("Club", "🏛️"),
    "amenity/college": ("College", "🎓"),
    "amenity/community_centre": ("Community Centre", "🤝"),
    "amenity/compressed_air": ("Compressed Air", "💨"),
    "amenity/conference_centre": ("Conference Centre", "📋"),
    "amenity/courthouse": ("Courthouse", "⚖️"),
    "amenity/coworking_space": ("Co-working Space", "💻"),
    "amenity/crematorium": ("Crematorium", "🕯️"),
    "amenity/dancing_school": ("Dance School", "💃"),
    "amenity/dentist": ("Dentist", "🦷"),
    "amenity/doctors": ("Doctor", "👨‍⚕️"),
    "amenity/dojo": ("Dojo", "🥋"),
    "amenity/donation_box": ("Donation Box", "📦"),
    "amenity/dressing_room": ("Dressing Room", "👗"),
    "amenity/drinking_water": ("Drinking Water", "🚰"),
    "amenity/driving_school": ("Driving School", "🚦"),
    "amenity/escooter_rental": ("E-Scooter Rental", "🛴"),
    "amenity/events_venue": ("Events Venue", "🎪"),
    "amenity/fast_food": ("Fast Food", "🍔"),
    "amenity/feeding_place": ("Feeding Place", "🍽️"),
    "amenity/ferry_terminal": ("Ferry Terminal", "⛴️"),
    "amenity/fire_station": ("Fire Station", "🚒"),
    "amenity/food_court": ("Food Court", "🍴"),
    "amenity/fountain": ("Fountain", "⛲"),
    "amenity/fuel": ("Fuel Station", "⛽"),
    "amenity/gambling": ("Gambling", "🎲"),
    "amenity/grave_yard": ("Graveyard", "🪦"),
    "amenity/hall": ("Hall", "🏛️"),
    "amenity/hookah_lounge": ("Hookah Lounge", "💨"),
    "amenity/hospital": ("Hospital", "🏥"),
    "amenity/ice_cream": ("Ice Cream", "🍦"),
    "amenity/internet_cafe": ("Internet Café", "🌐"),
    "amenity/kick-scooter_rental": ("Kick Scooter Rental", "🛴"),
    "amenity/kindergarten": ("Kindergarten", "💒"),
    "amenity/language_school": ("Language School", "🗣️"),
    "amenity/letter_box": ("Letter Box", "📮"),
    "amenity/library": ("Library", "📚"),
    "amenity/loading_dock": ("Loading Dock", "📥"),
    "amenity/lounge": ("Lounge", "🛋️"),
    "amenity/lounger": ("Public Lounger", "🪑"),
    "amenity/marketplace": ("Market", "🛒"),
    "amenity/money_transfer": ("Money Transfer", "💸"),
    "amenity/mounting_block": ("Mounting Block", "🐴"),
    "amenity/music_school": ("Music School", "🎵"),
    "amenity/music_venue": ("Music Venue", "🎶"),
    "amenity/nightclub": ("Nightclub", "🪩"),
    "amenity/nursing_home": ("Nursing Home", "🏠"),
    "amenity/parcel_locker": ("Parcel Locker", "📦"),
    "amenity/payment_terminal": ("Payment Terminal", "💳"),
    "amenity/pharmacy": ("Pharmacy", "💊"),
    "amenity/photo_booth": ("Photo Booth", "📸"),
    "amenity/piano": ("Public Piano", "🎹"),
    "amenity/place_of_worship": ("Place of Worship", "⛪"),
    "amenity/police": ("Police Station", "🚔"),
    "amenity/post_depot": ("Post Depot", "📬"),
    "amenity/post_office": ("Post Office", "🏤"),
    "amenity/prep_school": ("Prep School", "📖"),
    "amenity/pub": ("Pub", "🍺"),
    "amenity/public_bookcase": ("Public Bookcase", "📕"),
    "amenity/public_building": ("Public Building", "🏢"),
    "amenity/reception_desk": ("Reception Desk", "🛎️"),
    "amenity/recycling": ("Recycling", "♻️"),
    "amenity/restaurant": ("Restaurant", "🍽️"),
    "amenity/sanitary_dump_station": ("Sanitary Dump Station", "🚿"),
    "amenity/school": ("School", "🏫"),
    "amenity/scout_hut": ("Scout Hut", "⚜️"),
    "amenity/shelter": ("Shelter", "🛖"),
    "amenity/shower": ("Public Shower", "🚿"),
    "amenity/smoking_area": ("Smoking Area", "🚬"),
    "amenity/social_centre": ("Social Centre", "🏘️"),
    "amenity/social_club": ("Social Club", "🤝"),
    "amenity/social_facility": ("Social Facility", "🫂"),
    "amenity/stripclub": ("Strip Club", "🔞"),
    "amenity/studio": ("Studio", "🎙️"),
    "amenity/table": ("Public Table", "🪑"),
    "amenity/taxi": ("Taxi Stand", "🚕"),
    "amenity/telescope": ("Public Telescope", "🔭"),
    "amenity/theatre": ("Theatre", "🎭"),
    "amenity/ticket_validator": ("Ticket Validator", "🎫"),
    "amenity/townhall": ("Town Hall", "🏛️"),
    "amenity/training": ("Training Centre", "📝"),
    "amenity/trolley_bay": ("Trolley Bay", "🛒"),
    "amenity/university": ("University", "🏫"),
    "amenity/vehicle_inspection": ("Vehicle Inspection", "🔍"),
    "amenity/vending_machine": ("Vending Machine", "🏧"),
    "amenity/veterinary": ("Vet", "🐕"),
    "amenity/washing_machine": ("Washing Machine", "🧺"),
    "amenity/washingline": ("Washing Line", "👕"),
    "amenity/waste_disposal": ("Waste Disposal", "🗑️"),
    "amenity/waste_transfer_station": ("Waste Transfer Station", "🚛"),
    "amenity/water_point": ("Water Point", "💧"),
    "amenity/watering_place": ("Watering Place", "🚰"),
    "amenity/weighbridge": ("Weighbridge", "⚖️"),
    # building
    "building/barn": ("Barn", "🏚️"),
    "building/bunker": ("Bunker", "🏗️"),
    "building/chapel": ("Chapel", "⛪"),
    "building/church": ("Church", "⛪"),
    "building/commercial": ("Commercial Building", "🏬"),
    "building/construction": ("Construction Site", "🚧"),
    "building/farm": ("Farmhouse", "🌾"),
    "building/greenhouse": ("Greenhouse", "🌿"),
    "building/industrial": ("Industrial Building", "🏭"),
    "building/kiosk": ("Kiosk", "🏪"),
    "building/retail": ("Retail Building", "🏬"),
    "building/ruins": ("Ruins", "🏚️"),
    "building/school": ("School Building", "🏫"),
    "building/semidetached_house": ("Semi-Detached House", "🏠"),
    "building/service": ("Service Building", "🔧"),
    "building/university": ("University Building", "🎓"),
    "building/warehouse": ("Warehouse", "🏭"),
    # craft
    "craft/agricultural_engines": ("Agricultural Engines", "🚜"),
    "craft/atelier": ("Atelier", "🎨"),
    "craft/blacksmith": ("Blacksmith", "🔨"),
    "craft/bookbinder": ("Bookbinder", "📖"),
    "craft/brewery": ("Brewery", "🍺"),
    "craft/builder": ("Builder", "🧱"),
    "craft/carpenter": ("Carpenter", "🪚"),
    "craft/caterer": ("Caterer", "🍱"),
    "craft/cleaning": ("Cleaning Service", "🧹"),
    "craft/confectionery": ("Confectioner", "🍬"),
    "craft/distillery": ("Distillery", "🥃"),
    "craft/dressmaker": ("Dressmaker", "👗"),
    "craft/electrician": ("Electrician", "⚡"),
    "craft/electronics_repair": ("Electronics Repair", "🔌"),
    "craft/floorer": ("Flooring Specialist", "🪵"),
    "craft/gardener": ("Gardener", "🌱"),
    "craft/glaziery": ("Glazier", "🪟"),
    "craft/handicraft": ("Handicraft", "✂️"),
    "craft/hvac": ("HVAC", "❄️"),
    "craft/jeweller": ("Jeweller", "💎"),
    "craft/joiner": ("Joiner", "🪚"),
    "craft/key_cutter": ("Key Cutter", "🔑"),
    "craft/locksmith": ("Locksmith", "🔐"),
    "craft/metal_construction": ("Metal Fabrication", "🔩"),
    "craft/painter": ("Painter & Decorator", "🖌️"),
    "craft/photographer": ("Photographer", "📷"),
    "craft/photographic_laboratory": ("Photo Lab", "🖼️"),
    "craft/plumber": ("Plumber", "🔧"),
    "craft/pottery": ("Pottery", "🏺"),
    "craft/printer": ("Printer", "🖨️"),
    "craft/roofer": ("Roofer", "🏠"),
    "craft/sawmill": ("Sawmill", "🪵"),
    "craft/scaffolder": ("Scaffolder", "🏗️"),
    "craft/sculptor": ("Sculptor", "🗿"),
    "craft/shoemaker": ("Shoemaker", "👞"),
    "craft/signmaker": ("Sign Maker", "🪧"),
    "craft/stonemason": ("Stonemason", "🪨"),
    "craft/tailor": ("Tailor", "🧵"),
    "craft/upholsterer": ("Upholsterer", "🛋️"),
    "craft/watchmaker": ("Watchmaker", "⌚"),
    "craft/window_construction": ("Window Fitter", "🪟"),
    "craft/winery": ("Winery", "🍷"),
    "craft/yes": ("Craft Workshop", "🛠️"),
    # healthcare
    "healthcare/alternative": ("Alternative Medicine", "🌿"),
    "healthcare/audiologist": ("Audiologist", "👂"),
    "healthcare/centre": ("Health Centre", "🏥"),
    "healthcare/clinic": ("Health Clinic", "🩺"),
    "healthcare/counselling": ("Counselling", "🧠"),
    "healthcare/dentist": ("Dental Practice", "🦷"),
    "healthcare/doctor": ("GP Surgery", "👨‍⚕️"),
    "healthcare/hospital": ("Hospital", "🏥"),
    "healthcare/laboratory": ("Medical Lab", "🔬"),
    "healthcare/optometrist": ("Optometrist", "👁️"),
    "healthcare/pharmacy": ("Pharmacy", "💊"),
    "healthcare/physiotherapist": ("Physiotherapist", "🏃"),
    "healthcare/podiatrist": ("Podiatrist", "🦶"),
    "healthcare/psychotherapist": ("Psychotherapist", "🧠"),
    "healthcare/rehabilitation": ("Rehabilitation Centre", "♿"),
    "healthcare/vaccination_centre": ("Vaccination Centre", "💉"),
    "healthcare/yes": ("Healthcare Facility", "🏥"),
    # leisure
    "leisure/adult_gaming_centre": ("Adult Gaming Centre", "🎮"),
    "leisure/amusement_arcade": ("Amusement Arcade", "🕹️"),
    "leisure/bandstand": ("Bandstand", "🎺"),
    "leisure/bathing_place": ("Bathing Spot", "🏖️"),
    "leisure/bird_hide": ("Bird Hide", "🐦"),
    "leisure/bowling_alley": ("Bowling Alley", "🎳"),
    "leisure/common": ("Common Land", "🌳"),
    "leisure/dance": ("Dance Venue", "💃"),
    "leisure/dog_park": ("Dog Park", "🐕"),
    "leisure/escape_game": ("Escape Room", "🔓"),
    "leisure/fitness_centre": ("Gym", "🏋️"),
    "leisure/fitness_station": ("Outdoor Gym", "💪"),
    "leisure/garden": ("Garden", "🌷"),
    "leisure/golf_course": ("Golf Course", "⛳"),
    "leisure/hackerspace": ("Hackerspace", "💻"),
    "leisure/horse_riding": ("Horse Riding", "🐎"),
    "leisure/indoor_play": ("Indoor Play Area", "🧒"),
    "leisure/marina": ("Marina", "⚓"),
    "leisure/miniature_golf": ("Mini Golf", "⛳"),
    "leisure/nature_reserve": ("Nature Reserve", "🦔"),
    "leisure/outdoor_seating": ("Outdoor Seating", "🪑"),
    "leisure/park": ("Park", "🌳"),
    "leisure/pitch": ("Sports Pitch", "⚽"),
    "leisure/playground": ("Playground", "🛝"),
    "leisure/sauna": ("Sauna", "🧖"),
    "leisure/slipway": ("Slipway", "🚤"),
    "leisure/social_club": ("Social Club", "🍻"),
    "leisure/sports_centre": ("Sports Centre", "🏟️"),
    "leisure/sports_hall": ("Sports Hall", "🏀"),
    "leisure/swimming_pool": ("Swimming Pool", "🏊"),
    "leisure/tanning_salon": ("Tanning Salon", "☀️"),
    "leisure/track": ("Running Track", "🏃"),
    "leisure/trampoline_park": ("Trampoline Park", "🤸"),
    "leisure/water_park": ("Water Park", "🌊"),
    "leisure/wildlife_hide": ("Wildlife Hide", "🦌"),
    "leisure/yes": ("Leisure Facility", "🎉"),
    # office
    "office/accountant": ("Accountant", "🧮"),
    "office/advertising_agency": ("Advertising Agency", "📢"),
    "office/architect": ("Architect", "📐"),
    "office/association": ("Association", "🏛️"),
    "office/charity": ("Charity", "❤️"),
    "office/construction_company": ("Construction Company", "🏗️"),
    "office/consulting": ("Consulting Firm", "📊"),
    "office/courier": ("Courier Service", "📦"),
    "office/coworking": ("Co-working Space", "💻"),
    "office/design": ("Design Studio", "🎨"),
    "office/diplomatic": ("Diplomatic Office", "🏛️"),
    "office/educational_institution": ("Education Office", "🎓"),
    "office/employment_agency": ("Employment Agency", "💼"),
    "office/energy_supplier": ("Energy Supplier", "⚡"),
    "office/engineer": ("Engineering Firm", "⚙️"),
    "office/estate_agent": ("Estate Agent", "🏠"),
    "office/financial": ("Financial Services", "💰"),
    "office/financial_advisor": ("Financial Advisor", "📈"),
    "office/foundation": ("Foundation", "🏛️"),
    "office/government": ("Government Office", "🏛️"),
    "office/graphic_design": ("Graphic Design", "🖌️"),
    "office/healthcare": ("Healthcare Office", "🏥"),
    "office/home_care": ("Home Care Service", "🏠"),
    "office/insurance": ("Insurance", "🛡️"),
    "office/interior_design": ("Interior Design", "🛋️"),
    "office/it": ("IT Company", "💻"),
    "office/lawyer": ("Lawyer", "⚖️"),
    "office/logistics": ("Logistics", "🚚"),
    "office/marketing": ("Marketing Agency", "📣"),
    "office/mortgage": ("Mortgage Broker", "🏦"),
    "office/moving_company": ("Moving Company", "📦"),
    "office/newspaper": ("Newspaper Office", "📰"),
    "office/ngo": ("NGO", "🌍"),
    "office/notary": ("Notary", "📜"),
    "office/political_party": ("Political Party", "🗳️"),
    "office/politician": ("Politician Office", "🏛️"),
    "office/property_management": ("Property Management", "🏘️"),
    "office/recruitment": ("Recruitment Agency", "👥"),
    "office/religion": ("Religious Office", "✝️"),
    "office/research": ("Research Office", "🔬"),
    "office/security": ("Security Company", "🔒"),
    "office/solicitor": ("Solicitor", "⚖️"),
    "office/surveyor": ("Surveyor", "📏"),
    "office/tax_advisor": ("Tax Advisor", "🧾"),
    "office/taxi": ("Taxi Office", "🚕"),
    "office/telecommunication": ("Telecoms Office", "📡"),
    "office/therapist": ("Therapist", "🧠"),
    "office/travel_agent": ("Travel Agent", "✈️"),
    "office/union": ("Trade Union", "✊"),
    "office/university": ("University Office", "🎓"),
    "office/vacant": ("Vacant Office", "🏚️"),
    "office/web_design": ("Web Design", "🌐"),
    # shop
    "shop/accessories": ("Accessories Shop", "👜"),
    "shop/agrarian": ("Farm Supply Shop", "🌾"),
    "shop/alcohol": ("Off-Licence", "🍷"),
    "shop/antiques": ("Antiques Shop", "🏺"),
    "shop/appliance": ("Appliance Shop", "🔌"),
    "shop/art": ("Art Shop", "🎨"),
    "shop/baby_goods": ("Baby Shop", "🍼"),
    "shop/bag": ("Bag Shop", "👜"),
    "shop/bakery": ("Bakery", "🥐"),
    "shop/bathroom": ("Bathroom Shop", "🛁"),
    "shop/bathroom_furnishing": ("Bathroom Furnishings", "🚿"),
    "shop/beauty": ("Beauty Shop", "💄"),
    "shop/bed": ("Bed Shop", "🛏️"),
    "shop/beverages": ("Drinks Shop", "🥤"),
    "shop/bicycle": ("Bike Shop", "🚲"),
    "shop/boat": ("Boat Shop", "⛵"),
    "shop/bookmaker": ("Bookmaker", "🏇"),
    "shop/books": ("Bookshop", "📚"),
    "shop/boutique": ("Boutique", "👗"),
    "shop/building_materials": ("Building Materials", "🧱"),
    "shop/butcher": ("Butcher", "🥩"),
    "shop/camera": ("Camera Shop", "📷"),
    "shop/candles": ("Candle Shop", "🕯️"),
    "shop/car": ("Car Dealership", "🚗"),
    "shop/car;car_repair": ("Car Sales & Repair", "🚗"),
    "shop/car_parts": ("Car Parts", "🔩"),
    "shop/car_repair": ("Car Repair", "🔧"),
    "shop/caravan": ("Caravan Dealer", "🚐"),
    "shop/carpet": ("Carpet Shop", "🧶"),
    "shop/catalogue": ("Catalogue Shop", "📋"),
    "shop/charity": ("Charity Shop", "❤️"),
    "shop/cheese": ("Cheese Shop", "🧀"),
    "shop/chemist": ("Chemist", "🧪"),
    "shop/chocolate": ("Chocolate Shop", "🍫"),
    "shop/clothes": ("Clothes Shop", "👕"),
    "shop/coffee": ("Coffee Shop", "☕"),
    "shop/collector": ("Collector Shop", "🏆"),
    "shop/computer": ("Computer Shop", "🖥️"),
    "shop/confectionery": ("Sweet Shop", "🍬"),
    "shop/convenience": ("Convenience Store", "🏪"),
    "shop/copyshop": ("Copy Shop", "🖨️"),
    "shop/cosmetics": ("Cosmetics Shop", "💅"),
    "shop/country_store": ("Country Store", "🏡"),
    "shop/craft": ("Craft Shop", "✂️"),
    "shop/curtain": ("Curtain Shop", "🪟"),
    "shop/dairy": ("Dairy Shop", "🥛"),
    "shop/deli": ("Delicatessen", "🧆"),
    "shop/department_store": ("Department Store", "🏬"),
    "shop/discount": ("Discount Store", "💲"),
    "shop/doityourself": ("DIY Store", "🔨"),
    "shop/doors": ("Door Shop", "🚪"),
    "shop/dry_cleaning": ("Dry Cleaner", "👔"),
    "shop/e-cigarette": ("Vape Shop", "💨"),
    "shop/electrical": ("Electrical Shop", "⚡"),
    "shop/electronics": ("Electronics Shop", "📱"),
    "shop/erotic": ("Adult Shop", "🔞"),
    "shop/esoteric": ("Esoteric Shop", "🔮"),
    "shop/estate_agent": ("Estate Agent", "🏠"),
    "shop/fabric": ("Fabric Shop", "🧵"),
    "shop/fan": ("Fan Shop", "🏅"),
    "shop/farm": ("Farm Shop", "🥕"),
    "shop/fashion_accessories": ("Fashion Accessories", "👒"),
    "shop/fireplace": ("Fireplace Shop", "🔥"),
    "shop/fishing": ("Fishing Shop", "🎣"),
    "shop/flooring": ("Flooring Shop", "🪵"),
    "shop/florist": ("Florist", "💐"),
    "shop/food": ("Food Shop", "🍞"),
    "shop/frame": ("Framing Shop", "🖼️"),
    "shop/frozen_food": ("Frozen Food Shop", "🧊"),
    "shop/fuel": ("Fuel Shop", "⛽"),
    "shop/funeral_directors": ("Funeral Director", "⚰️"),
    "shop/furniture": ("Furniture Shop", "🪑"),
    "shop/games": ("Games Shop", "🎮"),
    "shop/garden_centre": ("Garden Centre", "🌻"),
    "shop/gas": ("Gas Shop", "🔥"),
    "shop/general": ("General Store", "🏪"),
    "shop/gift": ("Gift Shop", "🎁"),
    "shop/glaziery": ("Glazier", "🪟"),
    "shop/greengrocer": ("Greengrocer", "🥬"),
    "shop/grocery": ("Grocery Shop", "🛒"),
    "shop/haberdashery": ("Haberdashery", "🧵"),
    "shop/hairdresser": ("Hairdresser", "💇"),
    "shop/hairdresser_supply": ("Hairdresser Supply", "💇"),
    "shop/hardware": ("Hardware Shop", "🔩"),
    "shop/health": ("Health Shop", "🌿"),
    "shop/health_food": ("Health Food Shop", "🥗"),
    "shop/hearing_aids": ("Hearing Aid Shop", "👂"),
    "shop/herbalist": ("Herbalist", "🌿"),
    "shop/hifi": ("Hi-Fi Shop", "🔊"),
    "shop/household": ("Household Shop", "🏠"),
    "shop/household_linen": ("Linen Shop", "🛏️"),
    "shop/houseware": ("Houseware Shop", "🍳"),
    "shop/ice_cream": ("Ice Cream Shop", "🍦"),
    "shop/interior_decoration": ("Interior Decoration", "🖼️"),
    "shop/jewelry": ("Jewellery Shop", "💍"),
    "shop/kiosk": ("Kiosk", "🏪"),
    "shop/kitchen": ("Kitchen Shop", "🍳"),
    "shop/laundry": ("Laundry", "🧺"),
    "shop/leather": ("Leather Shop", "🧳"),
    "shop/lighting": ("Lighting Shop", "💡"),
    "shop/locksmith": ("Locksmith", "🔐"),
    "shop/mall": ("Shopping Centre", "🏬"),
    "shop/massage": ("Massage Parlour", "💆"),
    "shop/medical_supply": ("Medical Supply", "🩺"),
    "shop/military_surplus": ("Military Surplus", "🎖️"),
    "shop/mobile_phone": ("Mobile Phone Shop", "📱"),
    "shop/mobile_phone_accessories": ("Phone Accessories", "📱"),
    "shop/mobility": ("Mobility Shop", "♿"),
    "shop/mobility_scooter": ("Mobility Scooter Shop", "🦽"),
    "shop/model": ("Model Shop", "✈️"),
    "shop/money_lender": ("Money Lender", "💰"),
    "shop/motorcycle": ("Motorcycle Shop", "🏍️"),
    "shop/motorcycle_repair": ("Motorcycle Repair", "🔧"),
    "shop/music": ("Music Shop", "🎵"),
    "shop/musical_instrument": ("Musical Instrument Shop", "🎸"),
    "shop/newsagent": ("Newsagent", "📰"),
    "shop/nutrition_supplements": ("Nutrition Shop", "💪"),
    "shop/optician": ("Optician", "👓"),
    "shop/outdoor": ("Outdoor Shop", "🏕️"),
    "shop/outpost": ("Outpost", "📦"),
    "shop/paint": ("Paint Shop", "🎨"),
    "shop/party": ("Party Shop", "🎈"),
    "shop/pastry": ("Pastry Shop", "🥐"),
    "shop/pawnbroker": ("Pawnbroker", "💰"),
    "shop/perfumery": ("Perfumery", "🌸"),
    "shop/pet": ("Pet Shop", "🐾"),
    "shop/pet_grooming": ("Pet Grooming", "🐩"),
    "shop/photo": ("Photo Shop", "📸"),
    "shop/piercing": ("Piercing Studio", "💎"),
    "shop/plant_hire": ("Plant Hire", "🚜"),
    "shop/pottery": ("Pottery Shop", "🏺"),
    "shop/printer_ink": ("Ink & Toner Shop", "🖨️"),
    "shop/printing": ("Print Shop", "🖨️"),
    "shop/psychic": ("Psychic", "🔮"),
    "shop/pyrotechnics": ("Fireworks Shop", "🎆"),
    "shop/religion": ("Religious Shop", "✝️"),
    "shop/rental": ("Rental Shop", "🔑"),
    "shop/repair": ("Repair Shop", "🔧"),
    "shop/scuba_diving": ("Scuba Diving Shop", "🤿"),
    "shop/seafood": ("Fishmonger", "🐟"),
    "shop/second_hand": ("Second-Hand Shop", "♻️"),
    "shop/security": ("Security Shop", "🔒"),
    "shop/sewing": ("Sewing Shop", "🪡"),
    "shop/shoe_repair": ("Shoe Repair", "👞"),
    "shop/shoes": ("Shoe Shop", "👟"),
    "shop/sports": ("Sports Shop", "⚽"),
    "shop/stationery": ("Stationery Shop", "✏️"),
    "shop/storage_rental": ("Self Storage", "📦"),
    "shop/supermarket": ("Supermarket", "🛒"),
    "shop/swimming_pool": ("Pool Supplies", "🏊"),
    "shop/tailor": ("Tailor", "🧵"),
    "shop/tattoo": ("Tattoo Studio", "🖋️"),
    "shop/taxi": ("Taxi Booking", "🚕"),
    "shop/tea": ("Tea Shop", "🫖"),
    "shop/telecommunication": ("Telecoms Shop", "📡"),
    "shop/ticket": ("Ticket Office", "🎫"),
    "shop/tiles": ("Tile Shop", "🔲"),
    "shop/tobacco": ("Tobacconist", "🚬"),
    "shop/tool_hire": ("Tool Hire", "🧰"),
    "shop/toys": ("Toy Shop", "🧸"),
    "shop/trade": ("Trade Supplier", "🏭"),
    "shop/travel_agency": ("Travel Agency", "✈️"),
    "shop/trophy": ("Trophy Shop", "🏆"),
    "shop/tyres": ("Tyre Shop", "🛞"),
    "shop/vacant": ("Vacant Shop", "🏚️"),
    "shop/variety_store": ("Variety Store", "🏪"),
    "shop/video": ("Video Shop", "📀"),
    "shop/video_games": ("Video Game Shop", "🎮"),
    "shop/watches": ("Watch Shop", "⌚"),
    "shop/water_sports": ("Water Sports Shop", "🏄"),
    "shop/weapons": ("Weapons Shop", "🗡️"),
    "shop/wedding": ("Wedding Shop", "💒"),
    "shop/wholesale": ("Wholesaler", "📦"),
    "shop/wigs": ("Wig Shop", "💇"),
    "shop/window_blind": ("Blinds Shop", "🪟"),
    "shop/windows": ("Window Shop", "🪟"),
    "shop/wine": ("Wine Shop", "🍷"),
    "shop/wool": ("Wool Shop", "🧶"),
    "shop/yes": ("Shop", "🛍️"),
    # tourism
    "tourism/artwork": ("Public Artwork", "🎨"),
    "tourism/attraction": ("Tourist Attraction", "📸"),
    "tourism/camp_site": ("Campsite", "⛺"),
    "tourism/caravan_site": ("Caravan Site", "🚐"),
    "tourism/chalet": ("Chalet", "🏔️"),
    "tourism/gallery": ("Gallery", "🖼️"),
    "tourism/guest_house": ("Guest House", "🏡"),
    "tourism/hostel": ("Hostel", "🛏️"),
    "tourism/hotel": ("Hotel", "🏨"),
    "tourism/motel": ("Motel", "🏨"),
    "tourism/museum": ("Museum", "🏛️"),
    "tourism/picnic_site": ("Picnic Site", "🧺"),
    "tourism/preserved_railway": ("Heritage Railway", "🚂"),
    "tourism/theme_park": ("Theme Park", "🎢"),
    "tourism/viewpoint": ("Viewpoint", "🔭"),
    "tourism/zoo": ("Zoo", "🦁"),
 }
 NAPTAN_EMOJIS: dict[str, str] = {
    "Airport": "✈️",
    "Ferry": "⛴️",
    "Rail station": "🚆",
    "Bus stop": "🚏",
    "Bus station": "🚌",
    "Taxi rank": "🚕",
    "Metro or Tram stop": "🚊",
 }
 def transform(input_path: Path, naptan_path: Path | None = None) -> pl.LazyFrame:
    lf = pl.scan_parquet(input_path)
    # Get all unique categories present in the data
    all_categories = lf.select("category").unique().collect(engine="streaming").to_series().to_list()
    # Verify every non-dropped category has a mapping
    unmapped = []
    for cat in all_categories:
        if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP:
            unmapped.append(cat)
    if unmapped:
        raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}")
    # Verify every CATEGORY_MAP key actually exists in the data (catch typos)
    mapped_but_absent = []
    all_set = set(all_categories)
    for cat in CATEGORY_MAP:
        if cat not in all_set:
            mapped_but_absent.append(cat)
    if mapped_but_absent:
        raise ValueError(
            f"CATEGORY_MAP contains categories not in data: {sorted(mapped_but_absent)}"
        )
    # Drop unwanted categories
    lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
    # Build name and emoji lookup expressions
    name_mapping = {k: v[0] for k, v in CATEGORY_MAP.items()}
    emoji_mapping = {k: v[1] for k, v in CATEGORY_MAP.items()}
    # Check no friendly names are missing (defensive)
    missing_names = [k for k, v in CATEGORY_MAP.items() if not v[0]]
    if missing_names:
        raise ValueError(f"Empty friendly names for: {missing_names}")
    missing_emojis = [k for k, v in CATEGORY_MAP.items() if not v[1]]
    if missing_emojis:
        raise ValueError(f"Empty emojis for: {missing_emojis}")
    # Derive group from the first component of the raw category key, title-cased
    group_mapping = {
        k: k.split("/")[0].replace("_", " ").title() for k in CATEGORY_MAP
    }
    lf = lf.with_columns(
        pl.col("category").replace_strict(group_mapping).alias("group"),
        pl.col("category").replace_strict(name_mapping).alias("category"),
        pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
    )
    naptan = pl.scan_parquet(naptan_path).with_columns(
        pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
        pl.lit("Public Transport").alias("group"),
    )
    return pl.concat([lf, naptan], how="diagonal_relaxed")
 def main():
    parser = argparse.ArgumentParser(
        description="Transform raw POIs to filtered version with friendly names"
    )
    parser.add_argument(
        "--input", type=Path, required=True, help="Raw POIs parquet file"
    )
    parser.add_argument(
        "--naptan", type=Path, required=True, help="NaPTAN stations parquet file"
    )
    parser.add_argument(
        "--output", type=Path, required=True, help="Output filtered POIs parquet file"
    )
    args = parser.parse_args()
    df = transform(args.input, args.naptan).collect(engine="streaming")
    df.write_parquet(args.output)
    size_mb = args.output.stat().st_size / (1024 * 1024)
    print(f"Wrote {args.output} ({size_mb:.1f} MB, {len(df):,} POIs)")
    print(f"\nCategories ({df['category'].n_unique()}):")
    counts = df.group_by("category", "emoji").len().sort("len", descending=True)
    for row in counts.iter_rows(named=True):
        print(f"  {row['emoji']} {row['category']}: {row['len']:,}")
 if __name__ == "__main__":
    main()
--- a/pipeline/utils/init.py
+++ b/pipeline/utils/init.py
@ -0,0 +1,13 @@
 from .download import download, extract_zip
 from .fuzzy_join import fuzzy_join_on_postcode
 from .haversine import haversine_km, haversine_km_expr
 from .poi_counts import count_pois_within_radius
 __all__ = [
    "download",
    "extract_zip",
    "fuzzy_join_on_postcode",
    "haversine_km",
    "haversine_km_expr",
    "count_pois_within_radius",
 ]
--- a/pipeline/utils/download.py
+++ b/pipeline/utils/download.py
@ -0,0 +1,40 @@
 """Shared download and extraction helpers for pipeline scripts."""
 import zipfile
 from pathlib import Path
 import httpx
 from tqdm import tqdm
 def download(url: str, output_path: Path, *, timeout: float = 120) -> None:
    """Stream-download a URL to a local file with a tqdm progress bar."""
    with httpx.stream(
        "GET",
        url,
        follow_redirects=True,
        timeout=httpx.Timeout(30.0, read=timeout),
    ) as response:
        response.raise_for_status()  # pyright: ignore[reportUnusedCallResult]
        total = int(response.headers.get("content-length", 0))
        with (
            open(output_path, "wb") as f,
            tqdm(
                total=total or None,
                unit="B",
                unit_scale=True,
                unit_divisor=1024,
                desc=output_path.name,
            ) as pbar,
        ):
            for chunk in response.iter_bytes(chunk_size=8192):
                f.write(chunk)
                pbar.update(len(chunk))
 def extract_zip(zip_path: Path, extract_dir: Path) -> None:
    """Extract a ZIP archive into the given directory."""
    extract_dir.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, "r") as zf:
        zf.extractall(extract_dir)
--- a/pipeline/utils/fuzzy_join.py
+++ b/pipeline/utils/fuzzy_join.py
@ -0,0 +1,194 @@
 import re
 import shutil
 import tempfile
 from concurrent.futures import ProcessPoolExecutor
 from os import cpu_count
 from pathlib import Path
 import polars as pl
 from thefuzz import fuzz
 from tqdm import tqdm
 _NUMBER_RE = re.compile(r"\d+")
 def _normalize(s: pl.Expr) -> pl.Expr:
    return (
        s.str.to_uppercase()
        .str.replace_all(r"[,.\-]", " ")
        .str.replace_all(r"\s+", " ")
        .str.strip_chars()
    )
 def fuzzy_join_on_postcode(
    left: pl.LazyFrame,
    right: pl.LazyFrame,
    left_address_col: str,
    right_address_col: str,
    left_postcode_col: str,
    right_postcode_col: str,
 ) -> pl.LazyFrame:
    """Fuzzy join two LazyFrames by matching addresses within postcode buckets.
    Sinks each side to a temporary parquet file so the upstream pipeline
    executes only once.  The matching phase collects just three narrow
    columns (index, address, postcode) via projection pushdown, and the
    final join reads the remaining columns lazily.
    Returns a LazyFrame with all left and right columns.  Unmatched rows
    have null right columns.
    """
    tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_")
    left_path = Path(tmpdir) / "left.parquet"
    right_path = Path(tmpdir) / "right.parquet"
    try:
        # Materialise each side exactly once, with a row index, to temp parquet.
        left.with_row_index("_left_idx").sink_parquet(left_path)
        right.with_row_index("_right_idx").sink_parquet(right_path)
        # Collect only the narrow columns needed for matching (projection pushdown).
        left_match = (
            pl.scan_parquet(left_path)
            .select(
                "_left_idx",
                _normalize(pl.col(left_address_col)).alias("_left_address"),
                pl.col(left_postcode_col)
                .str.strip_chars()
                .str.to_uppercase()
                .alias("_left_postcode"),
            )
            .collect(engine="streaming")
        )
        right_match = (
            pl.scan_parquet(right_path)
            .select(
                "_right_idx",
                _normalize(pl.col(right_address_col)).alias("_right_address"),
                pl.col(right_postcode_col)
                .str.strip_chars()
                .str.to_uppercase()
                .alias("_right_postcode"),
            )
            .unique(subset=["_right_address", "_right_postcode"], keep="first")
            .collect(engine="streaming")
        )
        # Group right side by postcode for fast lookup
        right_by_postcode: dict[str, list[tuple[int, str]]] = {}
        for idx, postcode, address in zip(
            right_match["_right_idx"],
            right_match["_right_postcode"],
            right_match["_right_address"],
        ):
            if postcode is not None:
                right_by_postcode.setdefault(postcode, []).append((idx, address))
        # Group left side by postcode
        left_by_postcode: dict[str, list[tuple[int, str]]] = {}
        for idx, postcode, address in zip(
            left_match["_left_idx"],
            left_match["_left_postcode"],
            left_match["_left_address"],
        ):
            if address is not None and postcode is not None:
                left_by_postcode.setdefault(postcode, []).append((idx, address))
        del left_match, right_match
        # Build tasks for each postcode bucket
        tasks = [
            (left_entries, right_by_postcode[postcode])
            for postcode, left_entries in left_by_postcode.items()
            if postcode in right_by_postcode
        ]
        # Score all pairwise matches in parallel, then greedily assign from
        # highest score downward so best pairs lock in first.
        all_pairs: list[tuple[int, int, int]] = []  # (score, left_idx, right_idx)
        with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
            for pairs in tqdm(
                executor.map(_score_bucket, tasks, chunksize=64),
                total=len(tasks),
                desc="Fuzzy matching",
            ):
                all_pairs.extend(pairs)
        del tasks, left_by_postcode, right_by_postcode
        # Sort descending by score so best matches are assigned first
        all_pairs.sort(key=lambda t: (t[0], -t[1]), reverse=True)
        matches: list[tuple[int, int]] = []
        matched_left: set[int] = set()
        matched_right: set[int] = set()
        for _score, left_idx, right_idx in all_pairs:
            if left_idx in matched_left or right_idx in matched_right:
                continue
            matches.append((left_idx, right_idx))
            matched_left.add(left_idx)
            matched_right.add(right_idx)
        del all_pairs, matched_left, matched_right
        # Build a small mapping LazyFrame and join back to the cached parquets.
        if matches:
            mapping = pl.LazyFrame(
                {
                    "_left_idx": pl.Series([m[0] for m in matches], dtype=pl.UInt32),
                    "_right_idx": pl.Series([m[1] for m in matches], dtype=pl.UInt32),
                }
            )
        else:
            mapping = pl.LazyFrame(
                {
                    "_left_idx": pl.Series([], dtype=pl.UInt32),
                    "_right_idx": pl.Series([], dtype=pl.UInt32),
                }
            )
        left_cached = pl.scan_parquet(left_path)
        right_cached = pl.scan_parquet(right_path)
        return (
            left_cached.join(mapping, on="_left_idx", how="left")
            .join(right_cached, on="_right_idx", how="left")
            .drop("_left_idx", "_right_idx")
        )
    except BaseException:
        shutil.rmtree(tmpdir, ignore_errors=True)
        raise
 def _numbers_compatible(a: str, b: str) -> bool:
    """Check that numeric tokens (flat/house numbers) in the shorter set are a subset of the longer.
    Returns False if one address has numbers and the other doesn't.
    """
    nums_a = set(_NUMBER_RE.findall(a))
    nums_b = set(_NUMBER_RE.findall(b))
    smaller, larger = (
        (nums_a, nums_b) if len(nums_a) <= len(nums_b) else (nums_b, nums_a)
    )
    if not smaller and larger:
        return False
    return smaller.issubset(larger)
 def _score_bucket(
    args: tuple[list[tuple[int, str]], list[tuple[int, str]], int],
 ) -> list[tuple[int, int, int]]:
    """Score all address pairs within a single postcode bucket."""
    left_entries, right_entries = args
    pairs = []
    for left_row, left_address in left_entries:
        for right_row, right_address in right_entries:
            if not _numbers_compatible(left_address, right_address):
                continue
            score = fuzz.token_sort_ratio(left_address, right_address)
            pairs.append((score, left_row, right_row))
    return pairs
--- a/pipeline/utils/haversine.py
+++ b/pipeline/utils/haversine.py
@ -0,0 +1,43 @@
 import math
 import numpy as np
 import polars as pl
 _EARTH_RADIUS_KM = 6371.0
 def haversine_km(
    lat1: np.ndarray, lon1: np.ndarray, lat2: float, lon2: float
 ) -> np.ndarray:
    """Compute haversine distance in km between arrays (lat1, lon1) and a single point (lat2, lon2)."""
    lat1_rad = np.radians(lat1)
    lon1_rad = np.radians(lon1)
    lat2_rad = np.radians(lat2)
    lon2_rad = np.radians(lon2)
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad
    a = (
        np.sin(dlat / 2) ** 2
        + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2) ** 2
    )
    c = 2 * np.arcsin(np.sqrt(a))
    return _EARTH_RADIUS_KM * c
 def haversine_km_expr(
    lat_col: str, lon_col: str, dest_lat: float, dest_lon: float
 ) -> pl.Expr:
    """Polars expression computing haversine distance in km to a fixed point."""
    dest_lat_rad = math.radians(dest_lat)
    dest_lon_rad = math.radians(dest_lon)
    lat_rad = pl.col(lat_col).radians()
    lon_rad = pl.col(lon_col).radians()
    dlat = pl.lit(dest_lat_rad) - lat_rad
    dlon = pl.lit(dest_lon_rad) - lon_rad
    a = (dlat / 2).sin() ** 2 + pl.lit(dest_lat_rad).cos() * lat_rad.cos() * (
        dlon / 2
    ).sin() ** 2
    return 2 * _EARTH_RADIUS_KM * a.sqrt().arcsin()
--- a/pipeline/utils/poi_counts.py
+++ b/pipeline/utils/poi_counts.py
@ -0,0 +1,174 @@
 """Count POIs within a radius of properties, optimized via postcode deduplication."""
 import tempfile
 import numpy as np
 import polars as pl
 from .haversine import haversine_km
 def _count_pois_per_postcode(
    postcodes_df: pl.DataFrame,
    pois: pl.DataFrame,
    groups: dict[str, list[str]],
    radius_km: float = 2.0,
 ) -> pl.DataFrame:
    """
    For each unique postcode, count POIs within radius_km by category group.
    Uses spatial grid with vectorized distance calculations.
    """
    print(f"Counting POIs within {radius_km}km per postcode...")
    n_postcodes = len(postcodes_df)
    n_pois = len(pois)
    print(f"  {n_postcodes:,} postcodes, {n_pois:,} POIs")
    # Build spatial grid for POIs (0.05 degree cells ~5.5km)
    grid_size = 0.05
    print("  Building POI spatial grid...")
    # Convert to numpy arrays
    poi_lats = pois["lat"].to_numpy()
    poi_lngs = pois["lng"].to_numpy()
    poi_cats = pois["category"].to_numpy()
    # Compute grid coordinates for all POIs
    poi_grid_lats = np.floor(poi_lats / grid_size).astype(np.int32)
    poi_grid_lngs = np.floor(poi_lngs / grid_size).astype(np.int32)
    # Build grid cell lookup using numpy indexing
    poi_grid = {}
    for i in range(n_pois):
        key = (poi_grid_lats[i], poi_grid_lngs[i])
        if key not in poi_grid:
            poi_grid[key] = []
        poi_grid[key].append(i)
    # Convert grid values to numpy arrays for faster indexing
    for key in poi_grid:
        poi_grid[key] = np.array(poi_grid[key], dtype=np.int32)
    print(f"  POI grid has {len(poi_grid):,} occupied cells")
    # Pre-compute category masks
    category_masks = {}
    for group, categories in groups.items():
        mask = np.isin(poi_cats, categories)
        category_masks[group] = mask
        print(f"  {group}: {mask.sum():,} POIs")
    # Extract postcode coordinates as numpy arrays
    pc_lats = postcodes_df["lat"].to_numpy()
    pc_lons = postcodes_df["lon"].to_numpy()
    pc_codes = postcodes_df["postcode"].to_list()
    # Initialize result arrays
    result_counts = {
        group: np.zeros(n_postcodes, dtype=np.int32) for group in groups
    }
    # Process in batches with progress
    batch_size = 50000
    n_batches = (n_postcodes + batch_size - 1) // batch_size
    print(f"  Processing {n_postcodes:,} postcodes in {n_batches} batches...")
    for batch_idx in range(n_batches):
        start_idx = batch_idx * batch_size
        end_idx = min(start_idx + batch_size, n_postcodes)
        if batch_idx % 5 == 0:
            print(
                f"  Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}"
            )
        # Process batch
        for i in range(start_idx, end_idx):
            pc_lat = pc_lats[i]
            pc_lon = pc_lons[i]
            # Find grid cells to check (3x3 grid)
            grid_lat = int(np.floor(pc_lat / grid_size))
            grid_lng = int(np.floor(pc_lon / grid_size))
            # Collect nearby POI indices
            nearby_indices = []
            for dlat in [-1, 0, 1]:
                for dlng in [-1, 0, 1]:
                    cell_key = (grid_lat + dlat, grid_lng + dlng)
                    if cell_key in poi_grid:
                        nearby_indices.append(poi_grid[cell_key])
            if not nearby_indices:
                continue
            # Concatenate all nearby POI indices
            nearby = np.concatenate(nearby_indices)
            # Vectorized distance calculation for all nearby POIs
            distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lat, pc_lon)
            # Filter by radius
            within_mask = distances <= radius_km
            within_indices = nearby[within_mask]
            if len(within_indices) == 0:
                continue
            # Count by category group using pre-computed masks
            for group, cat_mask in category_masks.items():
                result_counts[group][i] = cat_mask[within_indices].sum()
    # Build result dataframe
    result_data = {"postcode": pc_codes}
    for group in groups:
        result_data[f"{group}_{int(radius_km)}km"] = result_counts[group]
    result = pl.DataFrame(result_data)
    print("  Completed POI counting")
    return result
 def count_pois_within_radius(
    properties: pl.DataFrame, pois: pl.DataFrame, radius_km: float = 2.0
 ) -> dict[str, pl.Series]:
    """
    Count POIs within radius for properties, optimized by deduplicating postcodes.
    Returns dict of {column_name: count_series} aligned to properties dataframe.
    """
    # Get unique postcodes with coordinates
    print("Deduplicating postcodes...")
    unique_postcodes = properties.select(["postcode", "lat", "lon"]).unique(
        subset=["postcode"]
    )
    print(
        f"  {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes"
    )
    # Count POIs per postcode
    postcode_counts = _count_pois_per_postcode(unique_postcodes, pois, radius_km)
    print("  Writing postcode counts to temp file...")
    with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp:
        tmp_path = tmp.name
        postcode_counts.write_parquet(tmp_path)
        # Join using lazy evaluation
        print("  Joining counts back to properties (lazy)...")
        count_cols = [f"{group}_{int(radius_km)}km" for group in POI_GROUPS]
        # Convert properties to lazy frame, join, then collect
        result_lazy = (
            properties.lazy()
            .select("postcode")
            .join(pl.scan_parquet(tmp_path), on="postcode", how="left")
            .select(count_cols)
            .fill_null(0)
        )
        result_df = result_lazy.collect(engine="streaming")
        return {col: result_df[col] for col in count_cols}
--- a/pipeline/utils/test_fuzzy_join.py
+++ b/pipeline/utils/test_fuzzy_join.py
@ -0,0 +1,46 @@
 import polars as pl
 from pipeline.utils import fuzzy_join_on_postcode
 POSTCODE = "E14 2DG"
 # Price paid: unique addresses for this postcode
 pp = (
    pl.scan_parquet("data/price-paid-complete.parquet")
    .filter(pl.col("postcode") == POSTCODE)
    .select("paon", "saon", "street", "postcode")
    .unique()
    .sort("saon")
    .with_columns(
        pl.concat_str(
            [pl.col("saon"), pl.col("paon"), pl.col("street")],
            separator=" ",
            ignore_nulls=True,
        ).alias("pp_address"),
    )
 )
 # EPC: latest inspection per address for this postcode
 epc = (
    pl.scan_csv("data/epc/certificates.csv")
    .select("ADDRESS", "POSTCODE", "INSPECTION_DATE")
    .filter(pl.col("POSTCODE").str.strip_chars() == POSTCODE)
    .sort("INSPECTION_DATE", descending=True)
    .unique("ADDRESS")
    .sort("ADDRESS")
 )
 result = fuzzy_join_on_postcode(
    left=pp,
    right=epc,
    left_address_col="pp_address",
    right_address_col="ADDRESS",
    left_postcode_col="postcode",
    right_postcode_col="POSTCODE",
 ).collect()
 snapshot = result.select("pp_address", "ADDRESS").sort("pp_address")
 print("Testing the matching between EPC and PP addresses")
 with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=80):
    print(snapshot)
--- a/pipeline/utils/test_haversine.py
+++ b/pipeline/utils/test_haversine.py
@ -0,0 +1,147 @@
 import numpy as np
 import polars as pl
 import pytest
 from pipeline.utils.haversine import haversine_km, haversine_km_expr
 class TestHaversineKm:
    """Test numpy-based haversine distance calculation."""
    def test_same_point(self):
        """Distance from a point to itself should be zero."""
        lat = np.array([51.5074])
        lon = np.array([-0.1278])
        dist = haversine_km(lat, lon, 51.5074, -0.1278)
        assert np.allclose(dist, 0.0, atol=1e-10)
    def test_known_distance_london_to_paris(self):
        """Test distance from London to Paris (~344 km)."""
        # London coordinates
        london_lat = np.array([51.5074])
        london_lon = np.array([-0.1278])
        # Paris coordinates
        paris_lat = 48.8566
        paris_lon = 2.3522
        dist = haversine_km(london_lat, london_lon, paris_lat, paris_lon)
        # Expected distance is approximately 344 km
        assert np.allclose(dist[0], 344, rtol=0.01)
    def test_known_distance_new_york_to_london(self):
        """Test distance from New York to London (~5570 km)."""
        ny_lat = np.array([40.7128])
        ny_lon = np.array([-74.0060])
        london_lat = 51.5074
        london_lon = -0.1278
        dist = haversine_km(ny_lat, ny_lon, london_lat, london_lon)
        # Expected distance is approximately 5570 km
        assert np.allclose(dist[0], 5570, rtol=0.01)
    def test_multiple_points(self):
        """Test calculating distances from multiple points to a single destination."""
        lats = np.array([51.5074, 48.8566, 40.7128])  # London, Paris, NYC
        lons = np.array([-0.1278, 2.3522, -74.0060])
        # Distance to Edinburgh
        edinburgh_lat = 55.9533
        edinburgh_lon = -3.1883
        dists = haversine_km(lats, lons, edinburgh_lat, edinburgh_lon)
        # All distances should be positive
        assert np.all(dists > 0)
        # London to Edinburgh should be shortest (~530 km)
        assert dists[0] < dists[1] < dists[2]
        assert np.allclose(dists[0], 530, rtol=0.02)
    def test_equator_points(self):
        """Test distance along the equator."""
        # Two points on the equator, 1 degree apart
        lat = np.array([0.0])
        lon1 = np.array([0.0])
        lon2 = 1.0
        dist = haversine_km(lat, lon1, 0.0, lon2)
        # 1 degree at equator ≈ 111 km
        assert np.allclose(dist[0], 111.2, rtol=0.01)
 class TestHaversineKmExpr:
    """Test Polars expression-based haversine distance calculation."""
    def test_same_point(self):
        """Distance from a point to itself should be zero."""
        df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
        result = df.select(
            haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
        )
        assert result["dist"][0] == pytest.approx(0.0, abs=1e-10)
    def test_known_distance_london_to_paris(self):
        """Test distance from London to Paris (~344 km)."""
        df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
        result = df.select(
            haversine_km_expr("lat", "lon", 48.8566, 2.3522).alias("dist")
        )
        assert result["dist"][0] == pytest.approx(344, rel=0.01)
    def test_known_distance_new_york_to_london(self):
        """Test distance from New York to London (~5570 km)."""
        df = pl.DataFrame({"lat": [40.7128], "lon": [-74.0060]})
        result = df.select(
            haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
        )
        assert result["dist"][0] == pytest.approx(5570, rel=0.01)
    def test_multiple_points(self):
        """Test calculating distances from multiple points to a single destination."""
        df = pl.DataFrame(
            {
                "lat": [51.5074, 48.8566, 40.7128],  # London, Paris, NYC
                "lon": [-0.1278, 2.3522, -74.0060],
            }
        )
        # Distance to Edinburgh
        result = df.select(
            haversine_km_expr("lat", "lon", 55.9533, -3.1883).alias("dist")
        )
        dists = result["dist"].to_numpy()
        # All distances should be positive
        assert np.all(dists > 0)
        # London to Edinburgh should be shortest (~530 km)
        assert dists[0] < dists[1] < dists[2]
        assert dists[0] == pytest.approx(530, rel=0.02)
    def test_equator_points(self):
        """Test distance along the equator."""
        df = pl.DataFrame({"lat": [0.0], "lon": [0.0]})
        result = df.select(haversine_km_expr("lat", "lon", 0.0, 1.0).alias("dist"))
        # 1 degree at equator ≈ 111 km
        assert result["dist"][0] == pytest.approx(111.2, rel=0.01)
 class TestHaversineConsistency:
    """Test that both implementations give consistent results."""
    def test_numpy_and_polars_match(self):
        """Both implementations should give identical results."""
        # Test data
        lats = np.array([51.5074, 48.8566, 40.7128, 55.9533, 52.5200])
        lons = np.array([-0.1278, 2.3522, -74.0060, -3.1883, 13.4050])
        dest_lat = 41.9028  # Rome
        dest_lon = 12.4964
        # Numpy version
        numpy_dists = haversine_km(lats, lons, dest_lat, dest_lon)
        # Polars version
        df = pl.DataFrame({"lat": lats, "lon": lons})
        polars_result = df.select(
            haversine_km_expr("lat", "lon", dest_lat, dest_lon).alias("dist")
        )
        polars_dists = polars_result["dist"].to_numpy()
        # Should be identical (or at least very close due to floating point)
        assert np.allclose(numpy_dists, polars_dists, rtol=1e-10)
--- a/pipeline/utils/test_poi_counts.py
+++ b/pipeline/utils/test_poi_counts.py
@ -0,0 +1,93 @@
 import polars as pl
 import pytest
 from pipeline.utils.poi_counts import POI_GROUPS, count_pois_within_radius
@pytest.fixture
 def pois():
    """POIs clustered around two locations: central London and 10km away."""
    return pl.DataFrame(
        {
            "lat": [51.5074, 51.5075, 51.5080, 51.5076, 51.5073, 51.60],
            "lng": [-0.1278, -0.1280, -0.1275, -0.1279, -0.1277, -0.20],
            "category": [
                "Restaurant",
                "Fast Food",
                "Supermarket",
                "Park",
                "Station",
                "Restaurant",  # too far from any property
            ],
        }
    )
@pytest.fixture
 def properties():
    """Two properties at the same postcode near central London, one at a distant postcode."""
    return pl.DataFrame(
        {
            "postcode": ["EC1A 1BB", "EC1A 1BB", "ZZ99 9ZZ"],
            "lat": [51.5074, 51.5074, 55.0],
            "lon": [-0.1278, -0.1278, -3.0],
        }
    )
 def test_counts_pois_within_radius(properties, pois):
    result = count_pois_within_radius(properties, pois, radius_km=2.0)
    assert set(result.keys()) == {f"{g}_2km" for g in POI_GROUPS}
    # Result Series must be aligned to properties (3 rows)
    for col, series in result.items():
        assert len(series) == 3, f"{col} has {len(series)} rows, expected 3"
    # First two rows share a postcode near the central London cluster
    assert result["restaurants_2km"][0] == 2  # Restaurant + Fast Food
    assert result["groceries_2km"][0] == 1  # Supermarket
    assert result["parks_2km"][0] == 1  # Park
    assert result["public_transport_2km"][0] == 1  # Station
    # Second row is the same postcode, so same counts
    assert result["restaurants_2km"][1] == result["restaurants_2km"][0]
    # Third row (ZZ99 9ZZ) is far from all POIs → zero counts
    for group in POI_GROUPS:
        assert result[f"{group}_2km"][2] == 0
 def test_no_pois_returns_zeros(properties):
    empty_pois = pl.DataFrame(
        {
            "lat": pl.Series([], dtype=pl.Float64),
            "lng": pl.Series([], dtype=pl.Float64),
            "category": pl.Series([], dtype=pl.String),
        }
    )
    result = count_pois_within_radius(properties, empty_pois, radius_km=2.0)
    for group in POI_GROUPS:
        col = f"{group}_2km"
        assert col in result
        assert result[col].to_list() == [0, 0, 0]
 def test_custom_radius(pois):
    """A tiny radius should exclude POIs that are even slightly away."""
    properties = pl.DataFrame(
        {
            "postcode": ["EC1A 1BB"],
            "lat": [51.5074],
            "lon": [-0.1278],
        }
    )
    # 0.01 km = 10m — only the POI at the exact same location should match
    result = count_pois_within_radius(properties, pois, radius_km=0.01)
    # The Restaurant at (51.5074, -0.1278) is at distance 0
    assert result["restaurants_0km"][0] >= 1
    # POIs >100m away should not be counted
    total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS)
    assert total <= 2  # at most the co-located POIs
--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,11 +6,9 @@ readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
    "attrs>=22.2.0",
-    "httpx>=0.28.1",
+    "httpx[socks]>=0.28.1",
    "ipywidgets>=8.0.0",
    "journey-client",
    "jupyter>=1.0.0",
    "nest-asyncio>=1.6.0",
    "numpy>=1.26.0",
    "pandas>=2.0.0",
    "plotly>=6.5.2",
@ -18,17 +16,31 @@ dependencies = [
    "pyarrow>=15.0.0",
    "python-dateutil>=2.8.0",
    "tqdm>=4.67.1",
    "fastapi[standard]>=0.115.0",
    "uvicorn>=0.34.0",
    "h3>=3.7.0",
    "overturemaps>=0.18.0",
    "fastexcel>=0.19.0",
    "scipy>=1.17.0",
    "matplotlib>=3.10.8",
    "osmium>=4.0.0",
    "matplotlib>=3.10.8",
    "thefuzz>=0.22.1",
    "scipy>=1.17.0",
    "shapely>=2.0.0",
    "rasterio>=1.5.0",
    "pyproj>=3.7.2",
 ]
-[dependency-groups]
+[tool.uv]
-dev = ["ruff>=0.8.0"]
+environments = ["sys_platform == 'linux' and python_version < '3.14'"]
-[tool.uv.sources]
+[dependency-groups]
-journey-client = { path = "./tfl_journey_client" }
+dev = [
    "deptry>=0.22.0",
    "pytest>=9.0.2",
    "ruff>=0.8.0",
 ]
 [tool.deptry.per_rule_ignores]
 # pyarrow/fastexcel: runtime backends for polars parquet/Excel I/O
 # jupyter/ipywidgets/pandas: needed to run analysis notebooks
 DEP002 = ["pyarrow", "fastexcel", "jupyter", "ipywidgets", "pandas"]
 # pytest is a dev dependency, not a missing one
 DEP004 = ["pytest"]
--- a/server-rs/Cargo.lock
+++ b/server-rs/Cargo.lock
--- a/server-rs/Cargo.toml
+++ b/server-rs/Cargo.toml
@ -0,0 +1,33 @@
 [package]
 name = "property-map-server"
 version = "0.1.0"
 edition = "2021"
 [dependencies]
 anyhow = "1"
 clap = { version = "4", features = ["derive"] }
 axum = "0.8"
 tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
 tokio = { version = "1", features = ["full"] }
 polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16"] }
 h3o = "0.7"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 rayon = "1"
 rustc-hash = "2"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
 [lints.clippy]
 min_ident_chars = "warn"
 [profile.dev]
 opt-level = 1
 [profile.release]
 opt-level = 3
 lto = "thin"
 [profile.production]
 inherits = "release"
 lto = true
--- a/server-rs/clippy.toml
+++ b/server-rs/clippy.toml
@ -0,0 +1 @@
 allowed-idents-below-min-chars = ["i", "j", "k", "_"]
--- a/server-rs/rust-toolchain.toml
+++ b/server-rs/rust-toolchain.toml
@ -0,0 +1,8 @@
 [toolchain]
 channel = "stable"
 targets = [
  "x86_64-unknown-linux-gnu",
  "x86_64-unknown-linux-musl",
  "aarch64-unknown-linux-gnu",
 ]
 profile = "default"
--- a/server-rs/src/consts.rs
+++ b/server-rs/src/consts.rs
@ -0,0 +1,28 @@
 pub const HISTOGRAM_BINS: usize = 100;
 pub const H3_PRECOMPUTE_MIN: u8 = 4;
 pub const H3_PRECOMPUTE_MAX: u8 = 12;
 pub const SERVER_ADDRESS: &str = "0.0.0.0:8001";
 pub const BOUNDS_QUANTIZATION: f64 = 0.01;
 pub const BOUNDS_BUFFER_PERCENT: f64 = 0.1;
 pub const POSTCODE_MIN_RESOLUTION: u8 = 11;
 pub const MAX_POIS_PER_REQUEST: usize = 2500;
 pub const DEFAULT_PROPERTIES_LIMIT: usize = 100;
 pub const MAX_PROPERTIES_LIMIT: usize = 500;
 pub const ENUM_NULL: u8 = 255;
 /// Canonical display order for POI category groups.
 /// The server will panic at startup if the data contains groups not in this list or vice versa.
 pub const POI_GROUP_ORDER: &[&str] = &[
    "Public Transport",
    "Amenity",
    "Building",
    "Craft",
    "Healthcare",
    "Leisure",
    "Office",
    "Shop",
    "Tourism",
 ];
--- a/server-rs/src/features.rs
+++ b/server-rs/src/features.rs
@ -0,0 +1,676 @@
 //! Static feature configuration. Every numeric and enum column in wide.parquet
 //! must be declared here. Unknown columns cause a startup panic.
 pub enum Bounds {
    /// Fixed min/max values for the slider
    Fixed { min: f64, max: f64 },
    /// Compute percentile from data at startup
    Percentile { low: f64, high: f64 },
 }
 pub struct FeatureConfig {
    /// Must match parquet column name exactly (also used as display label)
    pub name: &'static str,
    pub bounds: Bounds,
    /// Slider step size. Controls the granularity of the range slider in the UI.
    pub step: f64,
    /// Short one-line description shown in the filter sidebar
    pub description: &'static str,
    /// Longer description explaining methodology, data source, and caveats
    pub detail: &'static str,
    /// Data source slug for linking to /data-sources#<slug>
    pub source: &'static str,
 }
 pub struct FeatureGroup {
    pub name: &'static str,
    pub features: &'static [FeatureConfig],
 }
 pub struct EnumFeatureConfig {
    pub name: &'static str,
    /// If set, values are presented in this order instead of alphabetical.
    /// Values not listed are appended alphabetically after the ordered ones.
    pub order: Option<&'static [&'static str]>,
    /// Short one-line description shown in the filter sidebar
    pub description: &'static str,
    /// Longer description explaining methodology, data source, and caveats
    pub detail: &'static str,
    /// Data source slug for linking to /data-sources#<slug>
    pub source: &'static str,
 }
 pub struct EnumFeatureGroup {
    pub name: &'static str,
    pub features: &'static [EnumFeatureConfig],
 }
 /// Columns in parquet that are neither numeric features nor enum features.
 /// These are silently skipped during schema validation.
 pub const IGNORED_COLUMNS: &[&str] = &[
    "lat",
    "lon",
    "Address per Property Register",
    "Address per EPC",
    "Postcode",
    "historical_prices",
    "Is construction date approximate",
 ];
 pub static FEATURE_GROUPS: &[FeatureGroup] = &[
    FeatureGroup {
        name: "Property",
        features: &[
            FeatureConfig {
                name: "Last known price",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 2_000_000.0,
                },
                step: 10000.0,
                description: "Most recent sale price from the Land Registry",
                detail: "The last recorded sale price for this property from HM Land Registry Price Paid data. Covers residential sales in England and Wales. May be years old if the property hasn't sold recently.",
                source: "price-paid",
            },
            FeatureConfig {
                name: "Price per sqm",
                bounds: Bounds::Percentile {
                    low: 0.0,
                    high: 98.0,
                },
                step: 100.0,
                description: "Sale price divided by total floor area",
                detail: "Calculated by dividing the last known sale price by the total floor area from the EPC certificate. Useful for comparing value across different-sized properties. Only available where both price and floor area data exist.",
                source: "price-paid",
            },
            FeatureConfig {
                name: "Total floor area (sqm)",
                bounds: Bounds::Percentile {
                    low: 0.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Internal floor area from the EPC survey",
                detail: "Total useful floor area in square metres as measured during the Energy Performance Certificate assessment. Includes all habitable rooms but excludes garages, outbuildings, and external areas.",
                source: "epc",
            },
            FeatureConfig {
                name: "Number of bedrooms & living rooms",
                bounds: Bounds::Fixed {
                    min: 1.0,
                    max: 10.0,
                },
                step: 1.0,
                description: "Count of habitable rooms from the EPC survey",
                detail: "Total number of habitable rooms (bedrooms plus living rooms) as recorded in the Energy Performance Certificate. Kitchens and bathrooms are typically excluded unless they are large enough to count as habitable rooms.",
                source: "epc",
            },
            FeatureConfig {
                name: "Approximate construction age",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 2026.0,
                },
                step: 1.0,
                description: "Estimated year of construction from the EPC",
                detail: "The approximate year of construction as recorded in the Energy Performance Certificate. Derived from the construction age band (e.g. '1930-1949') by taking the midpoint. May be approximate, especially for older buildings.",
                source: "epc",
            },
        ],
    },
    FeatureGroup {
        name: "Transport",
        features: &[
            FeatureConfig {
                name: "public_transport_easy_minutes",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 180.0,
                },
                step: 2.0,
                description: "Quickest public transport journey to central London (easy route)",
                detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'easy' route minimises changes and walking. Calculated for weekday morning commute times.",
                source: "tfl-journey-times",
            },
            FeatureConfig {
                name: "public_transport_quick_minutes",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 180.0,
                },
                step: 2.0,
                description: "Fastest public transport journey to central London",
                detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'quick' route optimises for shortest total time regardless of changes. Calculated for weekday morning commute times.",
                source: "tfl-journey-times",
            },
            FeatureConfig {
                name: "cycling_minutes",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 180.0,
                },
                step: 1.0,
                description: "Cycling time to central London via TfL routing",
                detail: "Cycling journey time in minutes to central London destinations, as calculated by the TfL Journey Planner API. Uses TfL's default cycling speed and route preferences.",
                source: "tfl-journey-times",
            },
            FeatureConfig {
                name: "Public transport within 2km",
                bounds: Bounds::Percentile {
                    low: 5.0,
                    high: 95.0,
                },
                step: 1.0,
                description: "Number of public transport stops within 2km",
                detail: "Count of bus stops, rail stations, tube stations, tram stops, and other public transport access points within a 2km radius of the property's postcode. Derived from the NaPTAN (National Public Transport Access Nodes) dataset.",
                source: "naptan",
            },
        ],
    },
    FeatureGroup {
        name: "Education",
        features: &[
            FeatureConfig {
                name: "Education, Skills and Training Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "IoD education deprivation score for the local area",
                detail: "From the English Indices of Deprivation. Measures deprivation in education, skills and training in the local area (LSOA). Higher scores indicate greater deprivation. Combines children/young people sub-domain (school attainment, entry to higher education) and adult skills sub-domain (adult qualifications, English language proficiency).",
                source: "iod",
            },
            FeatureConfig {
                name: "Good+ primary schools within 5km",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 30.0,
                },
                step: 1.0,
                description: "Primary schools rated Good or Outstanding by Ofsted nearby",
                detail: "Number of state-funded primary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
                source: "ofsted",
            },
            FeatureConfig {
                name: "Good+ secondary schools within 5km",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 15.0,
                },
                step: 1.0,
                description: "Secondary schools rated Good or Outstanding by Ofsted nearby",
                detail: "Number of state-funded secondary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
                source: "ofsted",
            },
        ],
    },
    FeatureGroup {
        name: "Deprivation",
        features: &[
            FeatureConfig {
                name: "Index of Multiple Deprivation (IMD) Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "Overall deprivation score combining all domains",
                detail: "The Index of Multiple Deprivation is the official measure of relative deprivation in England. It combines seven weighted domains: Income (22.5%), Employment (22.5%), Education (13.5%), Health (13.5%), Crime (9.3%), Barriers to Housing & Services (9.3%), and Living Environment (9.3%). Higher scores indicate greater deprivation. Measured at LSOA level (~1,500 people).",
                source: "iod",
            },
            FeatureConfig {
                name: "Income Score (rate)",
                bounds: Bounds::Fixed { min: 0.0, max: 0.6 },
                step: 0.01,
                description: "Proportion of the population experiencing income deprivation",
                detail: "From the English Indices of Deprivation. The proportion of the local population experiencing deprivation relating to low income. Includes people on Income Support, income-based Jobseeker's Allowance, income-based Employment and Support Allowance, Pension Credit, Working Tax Credit and Child Tax Credit, Universal Credit, and asylum seekers.",
                source: "iod",
            },
            FeatureConfig {
                name: "Employment Score (rate)",
                bounds: Bounds::Fixed { min: 0.0, max: 0.4 },
                step: 0.01,
                description: "Proportion of the working-age population involuntarily excluded from work",
                detail: "From the English Indices of Deprivation. The proportion of the working-age population involuntarily excluded from the labour market. Includes claimants of Jobseeker's Allowance, Employment and Support Allowance, Incapacity Benefit, Severe Disablement Allowance, Carer's Allowance, and relevant Universal Credit claimants.",
                source: "iod",
            },
            FeatureConfig {
                name: "Health Deprivation and Disability Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "Risk of premature death and quality of life impairment",
                detail: "From the English Indices of Deprivation. Measures the risk of premature death and impairment of quality of life through poor physical or mental health. Derived from years of potential life lost, comparative illness and disability ratio, acute morbidity, and mood and anxiety disorders.",
                source: "iod",
            },
            FeatureConfig {
                name: "Crime Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "IoD crime deprivation score measuring personal risk",
                detail: "From the English Indices of Deprivation. Measures the risk of personal and material victimisation at local level. Derived from recorded rates of violence, burglary, theft, and criminal damage. Higher scores indicate higher crime-related deprivation.",
                source: "iod",
            },
            FeatureConfig {
                name: "Living Environment Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "Quality of the local indoor and outdoor environment",
                detail: "From the English Indices of Deprivation. Measures deprivation in the quality of the local environment. Combines the Indoors sub-domain (housing quality, central heating, housing conditions) and Outdoors sub-domain (air quality, road traffic accidents). Higher scores indicate poorer living environments.",
                source: "iod",
            },
            FeatureConfig {
                name: "Indoors Sub-domain Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "Housing quality and conditions in the local area",
                detail: "From the English Indices of Deprivation, Living Environment domain. Measures the quality of housing stock: houses without central heating, housing in poor condition, and houses failing Decent Homes standards. Higher scores indicate worse housing conditions.",
                source: "iod",
            },
            FeatureConfig {
                name: "Outdoors Sub-domain Score",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 0.1,
                description: "Air quality and road safety in the local area",
                detail: "From the English Indices of Deprivation, Living Environment domain. Measures the outdoor living environment quality through air quality indicators and road traffic accident casualties involving pedestrians and cyclists. Higher scores indicate poorer outdoor environments.",
                source: "iod",
            },
        ],
    },
    FeatureGroup {
        name: "Crime",
        features: &[
            FeatureConfig {
                name: "Anti-social behaviour (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly anti-social behaviour incidents in the area",
                detail: "Average number of anti-social behaviour incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes nuisance, environmental, and personal anti-social behaviour.",
                source: "crime",
            },
            FeatureConfig {
                name: "Violence and sexual offences (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly violent and sexual offences in the area",
                detail: "Average number of violence and sexual offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes assault, harassment, and sexual offences.",
                source: "crime",
            },
            FeatureConfig {
                name: "Criminal damage and arson (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly criminal damage and arson in the area",
                detail: "Average number of criminal damage and arson incidents per year in the LSOA, from police.uk street-level crime data (2023-2025).",
                source: "crime",
            },
            FeatureConfig {
                name: "Burglary (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly burglary offences in the area",
                detail: "Average number of burglary offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes residential and commercial burglary.",
                source: "crime",
            },
            FeatureConfig {
                name: "Vehicle crime (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly vehicle crime in the area",
                detail: "Average number of vehicle crime incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft of and from vehicles.",
                source: "crime",
            },
            FeatureConfig {
                name: "Robbery (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly robbery offences in the area",
                detail: "Average number of robbery offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Robbery involves theft with force or threat of force.",
                source: "crime",
            },
            FeatureConfig {
                name: "Other theft (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly other theft offences in the area",
                detail: "Average number of 'other theft' offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft not classified under burglary, vehicle crime, shoplifting, or bicycle theft.",
                source: "crime",
            },
            FeatureConfig {
                name: "Shoplifting (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly shoplifting offences in the area",
                detail: "Average number of shoplifting offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
                source: "crime",
            },
            FeatureConfig {
                name: "Drugs (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly drug offences in the area",
                detail: "Average number of drug offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes possession and trafficking offences.",
                source: "crime",
            },
            FeatureConfig {
                name: "Possession of weapons (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly weapons possession offences in the area",
                detail: "Average number of possession of weapons offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
                source: "crime",
            },
            FeatureConfig {
                name: "Public order (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly public order offences in the area",
                detail: "Average number of public order offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes causing fear, alarm, or distress.",
                source: "crime",
            },
            FeatureConfig {
                name: "Bicycle theft (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly bicycle theft in the area",
                detail: "Average number of bicycle theft offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
                source: "crime",
            },
            FeatureConfig {
                name: "Theft from the person (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly theft from the person in the area",
                detail: "Average number of theft from the person offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes pickpocketing and bag snatching without force.",
                source: "crime",
            },
            FeatureConfig {
                name: "Other crime (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Average yearly other crime in the area",
                detail: "Average number of other crime offences per year in the LSOA, from police.uk street-level crime data (2023-2025). A catch-all category for offences not classified elsewhere.",
                source: "crime",
            },
            FeatureConfig {
                name: "Serious crime (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Aggregate of serious crime categories per year",
                detail: "Sum of violence, robbery, burglary, and weapons possession per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single serious crime metric.",
                source: "crime",
            },
            FeatureConfig {
                name: "Minor crime (avg/yr)",
                bounds: Bounds::Percentile {
                    low: 2.0,
                    high: 98.0,
                },
                step: 1.0,
                description: "Aggregate of minor crime categories per year",
                detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single minor crime metric.",
                source: "crime",
            },
        ],
    },
    FeatureGroup {
        name: "Demographics",
        features: &[
            FeatureConfig {
                name: "% White",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 100.0,
                },
                step: 1.0,
                description: "Percentage of population identifying as White",
                detail: "From the 2021 Census. Percentage of the local authority population identifying as White (English, Welsh, Scottish, Northern Irish, British, Irish, Gypsy or Irish Traveller, Roma, or any other White background).",
                source: "ethnicity",
            },
            FeatureConfig {
                name: "% Asian",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 100.0,
                },
                step: 1.0,
                description: "Percentage of population identifying as Asian",
                detail: "From the 2021 Census. Percentage of the local authority population identifying as Asian or Asian British (Indian, Pakistani, Bangladeshi, Chinese, or any other Asian background).",
                source: "ethnicity",
            },
            FeatureConfig {
                name: "% Black",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 100.0,
                },
                step: 1.0,
                description: "Percentage of population identifying as Black",
                detail: "From the 2021 Census. Percentage of the local authority population identifying as Black, Black British, Caribbean, or African.",
                source: "ethnicity",
            },
            FeatureConfig {
                name: "% Mixed",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 100.0,
                },
                step: 1.0,
                description: "Percentage of population identifying as Mixed or Multiple ethnic groups",
                detail: "From the 2021 Census. Percentage of the local authority population identifying as Mixed or Multiple ethnic groups (White and Black Caribbean, White and Black African, White and Asian, or any other Mixed or Multiple background).",
                source: "ethnicity",
            },
            FeatureConfig {
                name: "% Other",
                bounds: Bounds::Fixed {
                    min: 0.0,
                    max: 100.0,
                },
                step: 1.0,
                description: "Percentage of population identifying as Other ethnic group",
                detail: "From the 2021 Census. Percentage of the local authority population identifying as Other ethnic group (Arab or any other ethnic group not covered by the main categories).",
                source: "ethnicity",
            },
        ],
    },
    FeatureGroup {
        name: "Amenities",
        features: &[
            FeatureConfig {
                name: "Restaurants within 2km",
                bounds: Bounds::Percentile {
                    low: 5.0,
                    high: 95.0,
                },
                step: 1.0,
                description: "Number of restaurants and cafes within 2km",
                detail: "Count of restaurants, cafes, and food establishments within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data using haversine distance calculation with a 0.05° spatial grid for candidate reduction.",
                source: "osm-pois",
            },
            FeatureConfig {
                name: "Groceries within 2km",
                bounds: Bounds::Percentile {
                    low: 5.0,
                    high: 95.0,
                },
                step: 1.0,
                description: "Number of grocery shops and supermarkets within 2km",
                detail: "Count of supermarkets, convenience stores, and other grocery shops within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
                source: "osm-pois",
            },
            FeatureConfig {
                name: "Parks within 2km",
                bounds: Bounds::Percentile {
                    low: 5.0,
                    high: 95.0,
                },
                step: 1.0,
                description: "Number of parks and green spaces within 2km",
                detail: "Count of parks, gardens, nature reserves, and other green spaces within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
                source: "osm-pois",
            },
        ],
    },
    FeatureGroup {
        name: "Environment",
        features: &[
            FeatureConfig {
                name: "Noise (dB)",
                bounds: Bounds::Fixed {
                    min: 50.0,
                    max: 80.0,
                },
                step: 1.0,
                description: "Road noise level at the postcode in decibels (Lden)",
                detail: "Road noise level in decibels (Lden — day-evening-night 24-hour weighted average) from Defra's Strategic Noise Mapping Round 4 (2022). Modelled at 4m above ground on a 10m grid. Sampled at postcode centroids via WCS GeoTIFF tiles. Values above ~55 dB are generally considered noticeable; above ~70 dB can affect health.",
                source: "noise",
            },
            FeatureConfig {
                name: "Max available download speed (Mbps)",
                bounds: Bounds::Percentile {
                    low: 5.0,
                    high: 95.0,
                },
                step: 10.0,
                description: "Maximum broadband download speed available at the postcode",
                detail: "Maximum available fixed broadband download speed in Megabits per second, from Ofcom's Connected Nations 2025 report. Measured at Output Area level and represents the maximum speed available from any provider, not actual achieved speeds.",
                source: "broadband",
            },
        ],
    },
 ];
 pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[EnumFeatureGroup {
    name: "Property",
    features: &[
        EnumFeatureConfig {
            name: "Leashold/Freehold",
            order: Some(&["Freehold", "Leasehold"]),
            description: "Whether the property is leasehold or freehold",
            detail: "From HM Land Registry Price Paid data. Freehold means you own the building and the land it stands on. Leasehold means you own the building but not the land — you have a lease from the freeholder for a set number of years.",
            source: "price-paid",
        },
        EnumFeatureConfig {
            name: "Current energy rating",
            order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
            description: "Current EPC energy efficiency rating (A-G)",
            detail: "The current energy efficiency rating from the Energy Performance Certificate, graded A (most efficient) to G (least efficient). Based on the energy costs per square metre of floor area for heating, hot water, lighting, and ventilation.",
            source: "epc",
        },
        EnumFeatureConfig {
            name: "Potential energy rating",
            order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
            description: "Achievable EPC rating after recommended improvements",
            detail: "The potential energy efficiency rating that could be achieved if all cost-effective improvements recommended in the EPC were carried out. Graded A (most efficient) to G (least efficient).",
            source: "epc",
        },
        EnumFeatureConfig {
            name: "Property type",
            order: Some(&["Detached", "Semi-Detached", "Terraced", "Flat"]),
            description: "Type of property: detached, semi-detached, terraced, or flat",
            detail: "From HM Land Registry Price Paid data. The broad property type classification: Detached, Semi-Detached, Terraced, or Flat/Maisonette.",
            source: "price-paid",
        },
        EnumFeatureConfig {
            name: "Property type/built form",
            order: None,
            description: "Detailed property type and built form from the EPC",
            detail: "A more detailed classification from the Energy Performance Certificate combining property type and built form. Examples include 'Semi-Detached House', 'Mid-Terrace House', 'Ground-Floor Flat', 'Detached Bungalow', etc.",
            source: "epc",
        },
    ],
 }];
 /// Flat ordered list of all numeric feature names (follows group order).
 pub fn all_numeric_feature_names() -> Vec<&'static str> {
    FEATURE_GROUPS
        .iter()
        .flat_map(|group| group.features.iter().map(|feature| feature.name))
        .collect()
 }
 /// Flat ordered list of all enum feature names (follows group order).
 pub fn all_enum_feature_names() -> Vec<&'static str> {
    ENUM_FEATURE_GROUPS
        .iter()
        .flat_map(|group| group.features.iter().map(|feature| feature.name))
        .collect()
 }
 /// Look up the configured value order for an enum feature by name.
 pub fn order_for(name: &str) -> Option<&'static [&'static str]> {
    ENUM_FEATURE_GROUPS
        .iter()
        .flat_map(|group| group.features.iter())
        .find(|feature| feature.name == name)
        .and_then(|feature| feature.order)
 }
 /// Look up the Bounds config for a numeric feature by name.
 pub fn bounds_for(name: &str) -> Option<&'static Bounds> {
    FEATURE_GROUPS
        .iter()
        .flat_map(|group| group.features.iter())
        .find(|feature| feature.name == name)
        .map(|feature| &feature.bounds)
 }
--- a/server-rs/src/filter.rs
+++ b/server-rs/src/filter.rs
@ -0,0 +1,86 @@
 use crate::consts::ENUM_NULL;
 use crate::data::EnumFeatureData;
 pub struct ParsedFilter {
    pub feat_idx: usize,
    pub min: f64,
    pub max: f64,
 }
 pub struct ParsedEnumFilter {
    pub enum_idx: usize,
    pub allowed: Vec<u8>,
 }
 /// Parse comma-separated filter string into numeric and enum filters.
 /// Numeric format: `name:min:max`
 /// Enum format: `name:val1|val2|val3` (pipe-separated values)
 pub fn parse_filters(
    filter_str: Option<&str>,
    feature_names: &[String],
    enum_features: &[EnumFeatureData],
 ) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
    let mut numeric = Vec::new();
    let mut enums = Vec::new();
    let input = match filter_str.filter(|text| !text.is_empty()) {
        Some(text) => text,
        None => return (numeric, enums),
    };
    for entry in input.split(',') {
        let parts: Vec<&str> = entry.splitn(2, ':').collect();
        if parts.len() != 2 {
            continue;
        }
        let name = parts[0].trim();
        let rest = parts[1].trim();
        if let Some(enum_idx) = enum_features.iter().position(|enum_feat| enum_feat.name == name) {
            let enum_feat = &enum_features[enum_idx];
            let allowed: Vec<u8> = rest
                .split('|')
                .filter_map(|value| {
                    let value = value.trim();
                    enum_feat.values.iter().position(|existing| existing == value).map(|position| position as u8)
                })
                .collect();
            enums.push(ParsedEnumFilter { enum_idx, allowed });
        } else {
            let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
            if num_parts.len() != 2 {
                continue;
            }
            let min = match num_parts[0].trim().parse::<f64>() {
                Ok(value) => value,
                Err(_) => continue,
            };
            let max = match num_parts[1].trim().parse::<f64>() {
                Ok(value) => value,
                Err(_) => continue,
            };
            if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) {
                numeric.push(ParsedFilter { feat_idx, min, max });
            }
        }
    }
    (numeric, enums)
 }
 pub fn row_passes_filters(
    row: usize,
    filters: &[ParsedFilter],
    enum_filters: &[ParsedEnumFilter],
    feature_data: &[f64],
    num_features: usize,
    enum_features: &[EnumFeatureData],
 ) -> bool {
    filters.iter().all(|filter| {
        let value = feature_data[row * num_features + filter.feat_idx];
        value.is_finite() && value >= filter.min && value <= filter.max
    }) && enum_filters.iter().all(|enum_filter| {
        let value = enum_features[enum_filter.enum_idx].data[row];
        value != ENUM_NULL && enum_filter.allowed.contains(&value)
    })
 }
--- a/server-rs/src/grid_index.rs
+++ b/server-rs/src/grid_index.rs
@ -0,0 +1,147 @@
 /// Grid-based spatial index for fast rectangle queries over property rows.
 ///
 /// Divides the UK bounding box into cells of ~0.01 degrees (~1km),
 /// each storing indices of rows whose lat/lon falls within that cell.
 pub struct GridIndex {
    min_lat: f64,
    min_lon: f64,
    cell_size: f64,
    cols: usize,
    rows: usize,
    /// cells[row * cols + col] = vec of row indices
    cells: Vec<Vec<u32>>,
 }
 impl GridIndex {
    pub fn build(lat: &[f64], lon: &[f64], cell_size: f64) -> Self {
        let mut min_lat = f64::INFINITY;
        let mut max_lat = f64::NEG_INFINITY;
        let mut min_lon = f64::INFINITY;
        let mut max_lon = f64::NEG_INFINITY;
        for index in 0..lat.len() {
            if lat[index] < min_lat {
                min_lat = lat[index];
            }
            if lat[index] > max_lat {
                max_lat = lat[index];
            }
            if lon[index] < min_lon {
                min_lon = lon[index];
            }
            if lon[index] > max_lon {
                max_lon = lon[index];
            }
        }
        min_lat -= cell_size;
        min_lon -= cell_size;
        max_lat += cell_size;
        max_lon += cell_size;
        let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
        let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
        tracing::debug!(
            rows_grid = rows,
            cols_grid = cols,
            total_cells = rows * cols,
            cell_size,
            "Building grid index"
        );
        let mut cells: Vec<Vec<u32>> = vec![Vec::new(); rows * cols];
        for index in 0..lat.len() {
            let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
            let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
            let cell_index = grid_row * cols + grid_col;
            cells[cell_index].push(index as u32);
        }
        tracing::debug!("Grid index built");
        GridIndex {
            min_lat,
            min_lon,
            cell_size,
            cols,
            rows,
            cells,
        }
    }
    pub fn query(&self, south: f64, west: f64, north: f64, east: f64) -> Vec<u32> {
        let Some((row_min, row_max, col_min, col_max)) =
            self.clamp_bounds(south, west, north, east)
        else {
            return Vec::new();
        };
        let mut result = Vec::new();
        for row in row_min..=row_max {
            let row_start = row * self.cols;
            for col in col_min..=col_max {
                result.extend_from_slice(&self.cells[row_start + col]);
            }
        }
        result
    }
    #[inline]
    pub fn for_each_in_bounds(
        &self,
        south: f64,
        west: f64,
        north: f64,
        east: f64,
        mut callback: impl FnMut(u32),
    ) {
        let Some((row_min, row_max, col_min, col_max)) =
            self.clamp_bounds(south, west, north, east)
        else {
            return;
        };
        for row in row_min..=row_max {
            let row_start = row * self.cols;
            for col in col_min..=col_max {
                for &row_idx in &self.cells[row_start + col] {
                    callback(row_idx);
                }
            }
        }
    }
    fn clamp_bounds(
        &self,
        south: f64,
        west: f64,
        north: f64,
        east: f64,
    ) -> Option<(usize, usize, usize, usize)> {
        let row_min_raw = ((south - self.min_lat) / self.cell_size) as isize;
        let row_max_raw = ((north - self.min_lat) / self.cell_size) as isize;
        let col_min_raw = ((west - self.min_lon) / self.cell_size) as isize;
        let col_max_raw = ((east - self.min_lon) / self.cell_size) as isize;
        let row_min = row_min_raw.max(0) as usize;
        let row_max_clamped = row_max_raw.min(self.rows as isize - 1);
        let col_min = col_min_raw.max(0) as usize;
        let col_max_clamped = col_max_raw.min(self.cols as isize - 1);
        if row_max_clamped < 0 || col_max_clamped < 0 {
            return None;
        }
        let row_max = row_max_clamped as usize;
        let col_max = col_max_clamped as usize;
        if row_min > row_max || col_min > col_max {
            return None;
        }
        Some((row_min, row_max, col_min, col_max))
    }
 }
--- a/server-rs/src/main.rs
+++ b/server-rs/src/main.rs
@ -0,0 +1,242 @@
 mod consts;
 mod data;
 mod features;
 mod filter;
 mod grid_index;
 mod routes;
 mod state;
 #[cfg(test)]
 mod tests;
 use std::path::PathBuf;
 use std::sync::Arc;
 use anyhow::{bail, Context};
 use axum::routing::get;
 use axum::Router;
 use clap::Parser;
 use tower_http::compression::CompressionLayer;
 use tower_http::cors::{Any, CorsLayer};
 use tower_http::services::ServeDir;
 use tower_http::trace::TraceLayer;
 use tracing::info;
 use tracing_subscriber::EnvFilter;
 use state::AppState;
 #[derive(Parser)]
 #[command(name = "narrowit", about = "Narrowit property map server")]
 struct Cli {
    /// Path to the wide property parquet file
    #[arg(long)]
    data: PathBuf,
    /// Path to the POI parquet file
    #[arg(long)]
    pois: PathBuf,
    /// Path to the frontend dist directory
    #[arg(long)]
    dist: Option<PathBuf>,
 }
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    tracing_subscriber::fmt()
        .with_env_filter(
            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
        )
        .with_ansi(true)
        .init();
    let cli = Cli::parse();
    let parquet_path = &cli.data;
    if !parquet_path.exists() {
        bail!(
            "Property parquet file not found: {}",
            parquet_path.display()
        );
    }
    info!("Loading property data from {}", parquet_path.display());
    let property_data = data::PropertyData::load(parquet_path)?;
    info!(
        rows = property_data.lat.len(),
        features = property_data.num_features,
        enums = property_data.enum_features.len(),
        "Property data loaded"
    );
    info!("Building spatial grid index (0.01° cells)");
    let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
    info!(
        "Precomputing H3 cells for resolutions {}-{}",
        consts::H3_PRECOMPUTE_MIN,
        consts::H3_PRECOMPUTE_MAX
    );
    let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
    let poi_path = cli.pois;
    if !poi_path.exists() {
        bail!("POI parquet file not found: {}", poi_path.display());
    }
    info!("Loading POI data from {}", poi_path.display());
    let poi_data = data::POIData::load(&poi_path)?;
    info!(pois = poi_data.lat.len(), "POI data loaded");
    info!("Building POI spatial grid index");
    let poi_grid = grid_index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
    let min_keys: Vec<String> = property_data
        .feature_names
        .iter()
        .map(|name| format!("min_{}", name))
        .collect();
    let max_keys: Vec<String> = property_data
        .feature_names
        .iter()
        .map(|name| format!("max_{}", name))
        .collect();
    let enum_min_keys: Vec<String> = property_data
        .enum_features
        .iter()
        .map(|enum_feature| format!("min_{}", enum_feature.name))
        .collect();
    let enum_max_keys: Vec<String> = property_data
        .enum_features
        .iter()
        .map(|enum_feature| format!("max_{}", enum_feature.name))
        .collect();
    // Precompute POI category groups
    let poi_category_groups = {
        let mut group_cats: std::collections::HashMap<String, std::collections::HashSet<String>> =
            std::collections::HashMap::new();
        for (category, group) in poi_data.category.iter().zip(poi_data.group.iter()) {
            group_cats
                .entry(group.clone())
                .or_default()
                .insert(category.clone());
        }
        // Validate that data groups match the hardcoded order exactly
        let expected: std::collections::HashSet<&str> =
            consts::POI_GROUP_ORDER.iter().copied().collect();
        let actual: std::collections::HashSet<&str> =
            group_cats.keys().map(|key| key.as_str()).collect();
        let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
        let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
        if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
            bail!(
                "POI group mismatch!\n  In POI_GROUP_ORDER but not in data: {:?}\n  In data but not in POI_GROUP_ORDER: {:?}",
                missing_from_data, missing_from_order
            );
        }
        consts::POI_GROUP_ORDER.iter().map(|group_name| group_name.to_string()).collect::<Vec<_>>()
            .into_iter()
            .map(|name| {
                let mut categories: Vec<String> =
                    group_cats.remove(&name).context("POI group validated but missing from map")?.into_iter().collect();
                categories.sort();
                Ok(state::POICategoryGroup { name, categories })
            })
            .collect::<anyhow::Result<Vec<_>>>()?
    };
    // Precompute enum name → index map
    let enum_name_to_idx: rustc_hash::FxHashMap<String, usize> = property_data
        .enum_features
        .iter()
        .enumerate()
        .map(|(index, enum_feature)| (enum_feature.name.clone(), index))
        .collect();
    let state = Arc::new(AppState {
        data: property_data,
        grid,
        h3_cells,
        poi_data,
        poi_grid,
        min_keys,
        max_keys,
        enum_min_keys,
        enum_max_keys,
        poi_category_groups,
        enum_name_to_idx,
    });
    let cors = CorsLayer::new()
        .allow_origin(Any)
        .allow_methods(Any)
        .allow_headers(Any);
    let state_features = state.clone();
    let state_hexagons = state.clone();
    let state_pois = state.clone();
    let state_poi_categories = state.clone();
    let state_hexagon_properties = state.clone();
    let state_hexagon_stats = state.clone();
    let api = Router::new()
        .route(
            "/api/features",
            get(move || routes::get_features(state_features.clone())),
        )
        .route(
            "/api/hexagons",
            get(move |query| routes::get_hexagons(state_hexagons.clone(), query)),
        )
        .route(
            "/api/pois",
            get(move |query| routes::get_pois(state_pois.clone(), query)),
        )
        .route(
            "/api/poi-categories",
            get(move || routes::get_poi_categories(state_poi_categories.clone())),
        )
        .route(
            "/api/hexagon-properties",
            get(move |query| {
                routes::get_hexagon_properties(state_hexagon_properties.clone(), query)
            }),
        )
        .route(
            "/api/hexagon-stats",
            get(move |query| routes::get_hexagon_stats(state_hexagon_stats.clone(), query)),
        );
    let frontend_dist = cli.dist.unwrap_or_else(|| {
        // Check next to the binary first, then fall back to working directory
        if let Ok(executable) = std::env::current_exe() {
            let executable_dir = executable.parent().unwrap_or_else(|| std::path::Path::new("."));
            let dist_next_to_binary = executable_dir.join("dist");
            if dist_next_to_binary.exists() {
                return dist_next_to_binary;
            }
        }
        PathBuf::from("frontend/dist")
    });
    let app = if frontend_dist.exists() {
        api.fallback_service(ServeDir::new(frontend_dist))
    } else {
        api
    };
    let app = app
        .layer(cors)
        .layer(CompressionLayer::new().zstd(true).gzip(true))
        .layer(TraceLayer::new_for_http());
    let addr = consts::SERVER_ADDRESS;
    let listener = tokio::net::TcpListener::bind(addr)
        .await
        .with_context(|| format!("Failed to bind to {addr}"))?;
    info!("Server listening on {}", addr);
    axum::serve(listener, app)
        .await
        .context("Server error")?;
    Ok(())
 }
--- a/server-rs/src/routes/features.rs
+++ b/server-rs/src/routes/features.rs
@ -0,0 +1,136 @@
 use std::sync::Arc;
 use axum::response::Json;
 use serde::Serialize;
 use tracing::info;
 use crate::data::Histogram;
 use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
 use crate::state::AppState;
 #[derive(Serialize)]
 #[serde(tag = "type")]
 pub enum FeatureInfo {
    #[serde(rename = "numeric")]
    Numeric {
        name: String,
        min: f64,
        max: f64,
        step: f64,
        histogram: Histogram,
        description: &'static str,
        detail: &'static str,
        source: &'static str,
    },
    #[serde(rename = "enum")]
    Enum {
        name: String,
        values: Vec<String>,
        description: &'static str,
        detail: &'static str,
        source: &'static str,
    },
 }
 #[derive(Serialize)]
 pub struct FeatureGroupResponse {
    name: String,
    features: Vec<FeatureInfo>,
 }
 #[derive(Serialize)]
 pub struct FeaturesResponse {
    groups: Vec<FeatureGroupResponse>,
 }
 pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
    // Collect all group names in order, merging numeric and enum groups with the same name
    let mut group_names: Vec<&str> = Vec::new();
    for feature_group in FEATURE_GROUPS {
        if !group_names.contains(&feature_group.name) {
            group_names.push(feature_group.name);
        }
    }
    for enum_group in ENUM_FEATURE_GROUPS {
        if !group_names.contains(&enum_group.name) {
            group_names.push(enum_group.name);
        }
    }
    let mut groups: Vec<FeatureGroupResponse> = Vec::new();
    for &group_name in &group_names {
        let mut features: Vec<FeatureInfo> = Vec::new();
        // Add numeric features for this group
        for feature_group in FEATURE_GROUPS {
            if feature_group.name == group_name {
                for feature_config in feature_group.features {
                    if let Some(feat_idx) =
                        state.data.feature_names.iter().position(|feat_name| feat_name == feature_config.name)
                    {
                        let stats = &state.data.feature_stats[feat_idx];
                        features.push(FeatureInfo::Numeric {
                            name: feature_config.name.to_string(),
                            min: stats.slider_min,
                            max: stats.slider_max,
                            step: feature_config.step,
                            histogram: stats.histogram.clone(),
                            description: feature_config.description,
                            detail: feature_config.detail,
                            source: feature_config.source,
                        });
                    }
                }
            }
        }
        // Add enum features for this group
        for enum_group in ENUM_FEATURE_GROUPS {
            if enum_group.name == group_name {
                for enum_config in enum_group.features {
                    if let Some(enum_feature) = state
                        .data
                        .enum_features
                        .iter()
                        .find(|enum_feat| enum_feat.name == enum_config.name)
                    {
                        features.push(FeatureInfo::Enum {
                            name: enum_config.name.to_string(),
                            values: enum_feature.values.clone(),
                            description: enum_config.description,
                            detail: enum_config.detail,
                            source: enum_config.source,
                        });
                    }
                }
            }
        }
        if !features.is_empty() {
            groups.push(FeatureGroupResponse {
                name: group_name.to_string(),
                features,
            });
        }
    }
    let num_numeric: usize = groups
        .iter()
        .flat_map(|group| &group.features)
        .filter(|feature| matches!(feature, FeatureInfo::Numeric { .. }))
        .count();
    let num_enum: usize = groups
        .iter()
        .flat_map(|group| &group.features)
        .filter(|feature| matches!(feature, FeatureInfo::Enum { .. }))
        .count();
    info!(
        numeric = num_numeric,
        enums = num_enum,
        groups = groups.len(),
        "GET /api/features"
    );
    Json(FeaturesResponse { groups })
 }
--- a/server-rs/src/routes/hexagon_stats.rs
+++ b/server-rs/src/routes/hexagon_stats.rs
@ -0,0 +1,251 @@
 use std::fmt::Write;
 use std::str::FromStr;
 use std::sync::Arc;
 use axum::extract::Query;
 use axum::http::StatusCode;
 use axum::response::IntoResponse;
 use serde::Deserialize;
 use tracing::{info, warn};
 use crate::consts::{ENUM_NULL, HISTOGRAM_BINS};
 use crate::filter::{parse_filters, row_passes_filters};
 use crate::state::AppState;
 use super::parse::h3_cell_bounds;
 #[derive(Deserialize)]
 pub struct HexagonStatsParams {
    pub h3: String,
    pub resolution: u8,
    pub filters: Option<String>,
 }
 pub async fn get_hexagon_stats(
    state: Arc<AppState>,
    Query(params): Query<HexagonStatsParams>,
 ) -> Result<impl IntoResponse, (StatusCode, String)> {
    let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
        warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
        (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
    })?;
    let cell_u64: u64 = cell.into();
    let resolution = params.resolution as usize;
    if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
        warn!(
            resolution,
            "Invalid or non-precomputed resolution for hexagon-stats"
        );
        return Err((
            StatusCode::BAD_REQUEST,
            "Invalid or non-precomputed resolution".to_string(),
        ));
    }
    let h3_str = params.h3.clone();
    let filters_str = params.filters.clone();
    let (parsed_filters, parsed_enum_filters) = parse_filters(
        params.filters.as_deref(),
        &state.data.feature_names,
        &state.data.enum_features,
    );
    let num_filters = parsed_filters.len() + parsed_enum_filters.len();
    let result = tokio::task::spawn_blocking(move || {
        let start_time = std::time::Instant::now();
        let h3_data = &state.h3_cells[resolution];
        let num_features = state.data.num_features;
        let feature_data = &state.data.feature_data;
        let enum_features = &state.data.enum_features;
        let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
        // Collect matching rows
        let mut matching_rows: Vec<usize> = Vec::new();
        state
            .grid
            .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
                let row = row_idx as usize;
                if h3_data[row] == cell_u64
                    && row_passes_filters(
                        row,
                        &parsed_filters,
                        &parsed_enum_filters,
                        feature_data,
                        num_features,
                        enum_features,
                    )
                {
                    matching_rows.push(row);
                }
            });
        let total_count = matching_rows.len();
        // Build JSON directly via string buffer
        let mut output = String::with_capacity(4096);
        output.push_str("{\"count\":");
        write!(output, "{}", total_count).unwrap();
        // Numeric features: compute count, min, max, sum, histogram using global bin edges
        output.push_str(",\"numeric_features\":[");
        let mut first_numeric = true;
        for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
            let global_stats = &state.data.feature_stats[feature_index];
            let histogram_min = global_stats.histogram.min;
            let histogram_max = global_stats.histogram.max;
            let bin_width = global_stats.histogram.bin_width;
            let mut count = 0usize;
            let mut min_value = f64::INFINITY;
            let mut max_value = f64::NEG_INFINITY;
            let mut sum = 0.0f64;
            let mut bins = vec![0u64; HISTOGRAM_BINS];
            for &row in &matching_rows {
                let value = feature_data[row * num_features + feature_index];
                if value.is_finite() {
                    count += 1;
                    if value < min_value {
                        min_value = value;
                    }
                    if value > max_value {
                        max_value = value;
                    }
                    sum += value;
                    // Bin into histogram using global edges
                    if bin_width > 0.0 {
                        let bin_index =
                            ((value - histogram_min) / bin_width).floor() as isize;
                        let clamped_index = bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
                        bins[clamped_index] += 1;
                    }
                }
            }
            if count == 0 {
                continue;
            }
            if !first_numeric {
                output.push(',');
            }
            first_numeric = false;
            let mean = sum / count as f64;
            output.push_str("{\"name\":");
            write_json_string(&mut output, feature_name);
            write!(output, ",\"count\":{}", count).unwrap();
            write!(output, ",\"min\":{}", format_f64(min_value)).unwrap();
            write!(output, ",\"max\":{}", format_f64(max_value)).unwrap();
            write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
            output.push_str(",\"histogram\":{\"min\":");
            write!(output, "{}", format_f64(histogram_min)).unwrap();
            output.push_str(",\"max\":");
            write!(output, "{}", format_f64(histogram_max)).unwrap();
            output.push_str(",\"bin_width\":");
            write!(output, "{}", format_f64(bin_width)).unwrap();
            output.push_str(",\"counts\":[");
            for (bin_index, &bin_count) in bins.iter().enumerate() {
                if bin_index > 0 {
                    output.push(',');
                }
                write!(output, "{}", bin_count).unwrap();
            }
            output.push_str("]}}")
        }
        // Enum features: count per value
        output.push_str("],\"enum_features\":[");
        let mut first_enum = true;
        for enum_feature in enum_features {
            let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
                Some(&index) => index,
                None => continue,
            };
            let enum_data = &state.data.enum_features[enum_index];
            let mut value_counts = vec![0u64; enum_data.values.len()];
            for &row in &matching_rows {
                let value = enum_data.data[row];
                if value != ENUM_NULL && (value as usize) < value_counts.len() {
                    value_counts[value as usize] += 1;
                }
            }
            // Only include if there are any non-zero counts
            let has_values = value_counts.iter().any(|&count| count > 0);
            if !has_values {
                continue;
            }
            if !first_enum {
                output.push(',');
            }
            first_enum = false;
            output.push_str("{\"name\":");
            write_json_string(&mut output, &enum_feature.name);
            output.push_str(",\"counts\":{");
            let mut first_value = true;
            for (value_index, &count) in value_counts.iter().enumerate() {
                if count == 0 {
                    continue;
                }
                if !first_value {
                    output.push(',');
                }
                first_value = false;
                write_json_string(&mut output, &enum_data.values[value_index]);
                write!(output, ":{}", count).unwrap();
            }
            output.push_str("}}");
        }
        output.push_str("]}");
        let elapsed = start_time.elapsed();
        info!(
            h3 = %h3_str,
            resolution,
            total_count,
            filters = num_filters,
            filters_raw = filters_str.as_deref().unwrap_or("-"),
            ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
            "GET /api/hexagon-stats"
        );
        output
    })
    .await
    .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
    Ok((
        [(axum::http::header::CONTENT_TYPE, "application/json")],
        result,
    ))
 }
 fn write_json_string(output: &mut String, value: &str) {
    output.push('"');
    for character in value.chars() {
        match character {
            '"' => output.push_str("\\\""),
            '\\' => output.push_str("\\\\"),
            '\n' => output.push_str("\\n"),
            '\r' => output.push_str("\\r"),
            '\t' => output.push_str("\\t"),
            other => output.push(other),
        }
    }
    output.push('"');
 }
 fn format_f64(value: f64) -> String {
    if value.fract() == 0.0 && value.abs() < 1e15 {
        format!("{:.1}", value)
    } else {
        format!("{}", value)
    }
 }
--- a/server-rs/src/routes/hexagons.rs
+++ b/server-rs/src/routes/hexagons.rs
@ -0,0 +1,375 @@
 use std::fmt::{self, Write};
 use std::sync::Arc;
 use axum::extract::Query;
 use axum::http::StatusCode;
 use axum::response::IntoResponse;
 use rustc_hash::FxHashMap;
 use serde::Deserialize;
 use tracing::{info, warn};
 use crate::consts::{
    BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN,
    POSTCODE_MIN_RESOLUTION,
 };
 use crate::filter::parse_filters;
 use crate::state::AppState;
 use super::parse::parse_bounds;
 struct HumanBytes(usize);
 impl fmt::Display for HumanBytes {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        let bytes = self.0;
        if bytes >= 1_000_000 {
            write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
        } else if bytes >= 1_000 {
            write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
        } else {
            write!(formatter, "{} B", bytes)
        }
    }
 }
 #[derive(Deserialize)]
 pub struct HexagonParams {
    resolution: u8,
    bounds: Option<String>,
    /// Comma-separated filters: `name:min:max,...`
    /// Rows must have non-NaN values within [min,max] for each filter.
    filters: Option<String>,
 }
 /// Per-cell accumulator for aggregating features
 struct CellAgg {
    count: u32,
    mins: Vec<f64>,
    maxs: Vec<f64>,
    /// Min/max ordinal indices for enum features (255 = no data yet)
    enum_mins: Vec<u8>,
    enum_maxs: Vec<u8>,
    /// Most common postcode in this cell (only tracked at high resolutions)
    postcode: Option<String>,
    postcode_count: u32,
    lat_sum: f64,
    lon_sum: f64,
 }
 impl CellAgg {
    fn new(num_features: usize, num_enums: usize) -> Self {
        CellAgg {
            count: 0,
            mins: vec![f64::INFINITY; num_features],
            maxs: vec![f64::NEG_INFINITY; num_features],
            enum_mins: vec![ENUM_NULL; num_enums],
            enum_maxs: vec![0; num_enums],
            postcode: None,
            postcode_count: 0,
            lat_sum: 0.0,
            lon_sum: 0.0,
        }
    }
    /// Add a row using row-major feature_data layout.
    /// feature_data[row * num_features + feat_idx] — all features for one row
    /// are contiguous, so this reads a single cache line per ~8 features.
    #[inline]
    fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
        self.count += 1;
        let base = row * num_features;
        let row_slice = &feature_data[base..base + num_features];
        for (feat_index, &value) in row_slice.iter().enumerate() {
            if value.is_finite() {
                if value < self.mins[feat_index] {
                    self.mins[feat_index] = value;
                }
                if value > self.maxs[feat_index] {
                    self.maxs[feat_index] = value;
                }
            }
        }
    }
    /// Track min/max ordinal index for each enum feature in this cell.
    #[inline]
    fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
        for (enum_index, enum_feature) in enum_features.iter().enumerate() {
            let value = enum_feature.data[row];
            if value != ENUM_NULL {
                if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
                    self.enum_mins[enum_index] = value;
                }
                if value > self.enum_maxs[enum_index] {
                    self.enum_maxs[enum_index] = value;
                }
            }
        }
    }
    /// Track postcode and centroid for high-resolution cells.
    /// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
    #[inline]
    fn add_postcode(&mut self, postcode: &str, lat: f64, lon: f64) {
        self.lat_sum += lat;
        self.lon_sum += lon;
        if postcode.is_empty() {
            return;
        }
        if self.postcode.is_none() {
            self.postcode = Some(postcode.to_string());
            self.postcode_count = 1;
        } else if self.postcode.as_deref() == Some(postcode) {
            self.postcode_count += 1;
        }
    }
 }
 /// Escape a string for inclusion in a JSON string literal.
 pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
    for character in text.chars() {
        match character {
            '"' => buf.push_str("\\\""),
            '\\' => buf.push_str("\\\\"),
            '\n' => buf.push_str("\\n"),
            '\r' => buf.push_str("\\r"),
            '\t' => buf.push_str("\\t"),
            ctrl if ctrl < '\x20' => { let _ = write!(buf, "\\u{:04x}", ctrl as u32); }
            other => buf.push(other),
        }
    }
 }
 /// Write the hexagons JSON response directly to a String buffer,
 /// avoiding serde_json::Value allocations entirely.
 #[allow(clippy::too_many_arguments)]
 fn write_hexagons_json(
    buf: &mut String,
    groups: &FxHashMap<u64, CellAgg>,
    min_keys: &[String],
    max_keys: &[String],
    num_features: usize,
    enum_min_keys: &[String],
    enum_max_keys: &[String],
    num_enums: usize,
    include_postcode: bool,
 ) {
    buf.push_str("{\"features\":[");
    let mut first = true;
    for (&cell_id, aggregation) in groups {
        let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
            continue;
        };
        if !first {
            buf.push(',');
        }
        first = false;
        let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
        for feat_index in 0..num_features {
            if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
                let _ = write!(
                    buf,
                    ",\"{}\":{},\"{}\":{}",
                    min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
                );
            }
        }
        for enum_index in 0..num_enums {
            if aggregation.enum_mins[enum_index] != ENUM_NULL {
                let _ = write!(
                    buf,
                    ",\"{}\":{},\"{}\":{}",
                    enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
                    enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
                );
            }
        }
        if include_postcode {
            if let Some(ref postcode) = aggregation.postcode {
                let total = aggregation.count as f64;
                let centroid_lat = aggregation.lat_sum / total;
                let centroid_lon = aggregation.lon_sum / total;
                if centroid_lat.is_finite() && centroid_lon.is_finite() {
                    buf.push_str(",\"postcode\":\"");
                    write_json_escaped(buf, postcode);
                    let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
                }
            }
        }
        buf.push('}');
    }
    buf.push_str("]}");
 }
 pub async fn get_hexagons(
    state: Arc<AppState>,
    Query(params): Query<HexagonParams>,
 ) -> Result<impl IntoResponse, (StatusCode, String)> {
    let resolution = params.resolution;
    if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
        warn!(
            resolution,
            "Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
        );
        return Err((
            StatusCode::BAD_REQUEST,
            format!(
                "resolution must be between {} and {}",
                H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
            ),
        ));
    }
    let bounds_str = params.bounds.ok_or((
        StatusCode::BAD_REQUEST,
        "bounds parameter is required".into(),
    ))?;
    let (mut south, mut west, mut north, mut east) = parse_bounds(&bounds_str)?;
    let lat_range = north - south;
    let lng_range = east - west;
    south -= lat_range * BOUNDS_BUFFER_PERCENT;
    north += lat_range * BOUNDS_BUFFER_PERCENT;
    west -= lng_range * BOUNDS_BUFFER_PERCENT;
    east += lng_range * BOUNDS_BUFFER_PERCENT;
    south = (south / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
    west = (west / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
    north = (north / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
    east = (east / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
    let filters_str = params.filters.clone();
    let (parsed_filters, parsed_enum_filters) = parse_filters(
        params.filters.as_deref(),
        &state.data.feature_names,
        &state.data.enum_features,
    );
    let num_filters = parsed_filters.len() + parsed_enum_filters.len();
    let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
        let t0 = std::time::Instant::now();
        let num_features = state.data.num_features;
        let num_enums = state.data.enum_features.len();
        let feature_data = &state.data.feature_data;
        let min_keys = &state.min_keys;
        let max_keys = &state.max_keys;
        let enum_min_keys = &state.enum_min_keys;
        let enum_max_keys = &state.enum_max_keys;
        let h3_cells_for_res: Option<&[u64]> = state
            .h3_cells
            .get(resolution as usize)
            .filter(|cells| !cells.is_empty())
            .map(|cells| cells.as_slice());
        let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
        let enum_features = &state.data.enum_features;
        let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
        // Row-level filter check: numeric must be non-NaN and within [min, max],
        // enum must have value index in the allowed set
        let row_passes = |row: usize| -> bool {
            parsed_filters.iter().all(|filter| {
                let value = feature_data[row * num_features + filter.feat_idx];
                value.is_finite() && value >= filter.min && value <= filter.max
            }) && parsed_enum_filters.iter().all(|enum_filter| {
                let value = enum_features[enum_filter.enum_idx].data[row];
                value != ENUM_NULL && enum_filter.allowed.contains(&value)
            })
        };
        if let Some(precomputed) = h3_cells_for_res {
            state
                .grid
                .for_each_in_bounds(south, west, north, east, |row_idx| {
                    let row = row_idx as usize;
                    if !row_passes(row) {
                        return;
                    }
                    let cell_id = precomputed[row];
                    let aggregation = groups
                        .entry(cell_id)
                        .or_insert_with(|| CellAgg::new(num_features, num_enums));
                    aggregation.add_row(feature_data, row, num_features);
                    aggregation.add_enums(enum_features, row);
                    if include_postcode {
                        aggregation.add_postcode(
                            &state.data.postcode[row],
                            state.data.lat[row],
                            state.data.lon[row],
                        );
                    }
                });
        } else {
            let h3_res = h3o::Resolution::try_from(resolution)
                .map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
            state
                .grid
                .for_each_in_bounds(south, west, north, east, |row_idx| {
                    let row = row_idx as usize;
                    if !row_passes(row) {
                        return;
                    }
                    let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
                        .map(|coord| u64::from(coord.to_cell(h3_res)))
                        .unwrap_or(0);
                    let aggregation = groups
                        .entry(cell_id)
                        .or_insert_with(|| CellAgg::new(num_features, num_enums));
                    aggregation.add_row(feature_data, row, num_features);
                    aggregation.add_enums(enum_features, row);
                    if include_postcode {
                        aggregation.add_postcode(
                            &state.data.postcode[row],
                            state.data.lat[row],
                            state.data.lon[row],
                        );
                    }
                });
        }
        let t_agg = t0.elapsed();
        let mut json_buf = String::with_capacity(groups.len() * 128);
        write_hexagons_json(
            &mut json_buf,
            &groups,
            min_keys,
            max_keys,
            num_features,
            enum_min_keys,
            enum_max_keys,
            num_enums,
            include_postcode,
        );
        let t_total = t0.elapsed();
        info!(
            resolution,
            cells = groups.len(),
            filters = num_filters,
            filters_raw = filters_str.as_deref().unwrap_or("-"),
            agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
            total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
            size = format_args!("{}", HumanBytes(json_buf.len())),
            "GET /api/hexagons"
        );
        Ok(json_buf)
    })
    .await
    .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
    .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
    Ok(([("content-type", "application/json")], json_body))
 }
--- a/server-rs/src/routes/mod.rs
+++ b/server-rs/src/routes/mod.rs
@ -0,0 +1,12 @@
 mod features;
 pub(crate) mod hexagons;
 mod hexagon_stats;
 pub(crate) mod parse;
 mod pois;
 pub(crate) mod properties;
 pub use features::get_features;
 pub use hexagon_stats::get_hexagon_stats;
 pub use hexagons::get_hexagons;
 pub use pois::{get_poi_categories, get_pois};
 pub use properties::get_hexagon_properties;
--- a/server-rs/src/routes/parse.rs
+++ b/server-rs/src/routes/parse.rs
@ -0,0 +1,52 @@
 use axum::http::StatusCode;
 /// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
 pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
    let boundary = cell.boundary();
    let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
    let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
    for vertex in boundary.iter() {
        let lat = vertex.lat();
        let lon = vertex.lng();
        if lat < min_lat {
            min_lat = lat;
        }
        if lat > max_lat {
            max_lat = lat;
        }
        if lon < min_lon {
            min_lon = lon;
        }
        if lon > max_lon {
            max_lon = lon;
        }
    }
    (
        min_lat - buffer,
        min_lon - buffer,
        max_lat + buffer,
        max_lon + buffer,
    )
 }
 pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCode, String)> {
    let parts: Vec<f64> = bounds_str
        .split(',')
        .map(|part| part.trim().parse::<f64>())
        .collect::<Result<Vec<_>, _>>()
        .map_err(|_| {
            (
                StatusCode::BAD_REQUEST,
                "Invalid bounds format. Use: south,west,north,east".into(),
            )
        })?;
    if parts.len() != 4 {
        return Err((
            StatusCode::BAD_REQUEST,
            "Invalid bounds format. Use: south,west,north,east".into(),
        ));
    }
    Ok((parts[0], parts[1], parts[2], parts[3]))
 }
--- a/server-rs/src/routes/pois.rs
+++ b/server-rs/src/routes/pois.rs
@ -0,0 +1,128 @@
 use std::sync::Arc;
 use axum::extract::Query;
 use axum::http::StatusCode;
 use axum::response::Json;
 use serde::{Deserialize, Serialize};
 use tracing::info;
 use crate::consts::MAX_POIS_PER_REQUEST;
 use crate::data::POI;
 use crate::state::{AppState, POICategoryGroup};
 use super::parse::parse_bounds;
 #[derive(Deserialize)]
 pub struct POIParams {
    bounds: Option<String>,
    /// Comma-separated list of categories to filter by
    categories: Option<String>,
 }
 #[derive(Serialize)]
 pub struct POIsResponse {
    pois: Vec<POI>,
 }
 pub async fn get_pois(
    state: Arc<AppState>,
    Query(params): Query<POIParams>,
 ) -> Result<Json<POIsResponse>, (StatusCode, String)> {
    let bounds_str = params.bounds.ok_or((
        StatusCode::BAD_REQUEST,
        "bounds parameter is required".into(),
    ))?;
    let (south, west, north, east) = parse_bounds(&bounds_str)?;
    let categories_str = params.categories.clone();
    let category_filter: Option<rustc_hash::FxHashSet<String>> = params
        .categories
        .as_deref()
        .filter(|text| !text.is_empty())
        .map(|text| text.split(',').map(|part| part.trim().to_string()).collect());
    let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
    let result = tokio::task::spawn_blocking(move || {
        let t0 = std::time::Instant::now();
        let row_indices = state.poi_grid.query(south, west, north, east);
        // Collect matching row indices first, then sample randomly so the
        // subset covers the viewport uniformly instead of clustering in one area.
        let mut matching_rows: Vec<usize> = row_indices
            .iter()
            .filter_map(|&row_idx| {
                let row = row_idx as usize;
                if let Some(ref categories) = category_filter {
                    if !categories.contains(&state.poi_data.category[row]) {
                        return None;
                    }
                }
                Some(row)
            })
            .collect();
        if matching_rows.len() > MAX_POIS_PER_REQUEST {
            // Use a power-of-2 sampling step so each POI's inclusion depends
            // only on its own priority hash, not on what other POIs are in
            // the viewport. This prevents visible reshuffling when panning.
            let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
            let step = ratio.next_power_of_two();
            let mask = step - 1;
            matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
            // Statistical noise may leave us slightly over the limit
            if matching_rows.len() > MAX_POIS_PER_REQUEST {
                matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
                matching_rows.truncate(MAX_POIS_PER_REQUEST);
            }
        }
        let pois: Vec<POI> = matching_rows
            .iter()
            .map(|&row| POI {
                id: state.poi_data.id[row].clone(),
                name: state.poi_data.name[row].clone(),
                category: state.poi_data.category[row].clone(),
                group: state.poi_data.group[row].clone(),
                lat: state.poi_data.lat[row],
                lng: state.poi_data.lng[row],
                emoji: state.poi_data.emoji[row].clone(),
            })
            .collect();
        let elapsed = t0.elapsed();
        info!(
            results = pois.len(),
            candidates = row_indices.len(),
            categories = num_categories,
            categories_raw = categories_str.as_deref().unwrap_or("-"),
            ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
            "GET /api/pois"
        );
        POIsResponse { pois }
    })
    .await
    .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
    Ok(Json(result))
 }
 #[derive(Serialize)]
 pub struct POICategoriesResponse {
    groups: Vec<POICategoryGroup>,
 }
 pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
    let groups: Vec<POICategoryGroup> = state.poi_category_groups.clone();
    let total: usize = groups.iter().map(|group| group.categories.len()).sum();
    info!(
        count = total,
        groups = groups.len(),
        "GET /api/poi-categories"
    );
    Json(POICategoriesResponse { groups })
 }
--- a/server-rs/src/routes/properties.rs
+++ b/server-rs/src/routes/properties.rs
@ -0,0 +1,230 @@
 use std::str::FromStr;
 use std::sync::Arc;
 use axum::extract::Query;
 use axum::http::StatusCode;
 use axum::response::Json;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use tracing::{info, warn};
 use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, MAX_PROPERTIES_LIMIT};
 use crate::data::EnumFeatureData;
 use crate::filter::{parse_filters, row_passes_filters};
 use crate::state::AppState;
 use super::parse::h3_cell_bounds;
 #[derive(Deserialize)]
 pub struct HexagonPropertiesParams {
    pub h3: String,
    pub resolution: u8,
    pub filters: Option<String>,
    pub limit: Option<usize>,
    pub offset: Option<usize>,
 }
 #[derive(Serialize)]
 pub struct Property {
    // String fields
    pub address: Option<String>,
    pub postcode: Option<String>,
    pub property_type: Option<String>,
    pub built_form: Option<String>,
    pub duration: Option<String>,
    pub current_energy_rating: Option<String>,
    pub potential_energy_rating: Option<String>,
    // Numeric fields
    pub lat: f64,
    pub lon: f64,
    pub is_construction_date_approximate: Option<bool>,
    #[serde(flatten)]
    pub features: FxHashMap<String, f64>,
 }
 #[derive(Serialize)]
 pub struct HexagonPropertiesResponse {
    pub properties: Vec<Property>,
    pub total: usize,
    pub limit: usize,
    pub offset: usize,
    pub truncated: bool,
 }
 fn non_empty_string(text: &str) -> Option<String> {
    let trimmed = text.trim();
    if trimmed.is_empty() {
        None
    } else {
        Some(trimmed.to_string())
    }
 }
 fn lookup_enum_value(
    enum_features: &[EnumFeatureData],
    enum_idx: &FxHashMap<String, usize>,
    row: usize,
    names: &[&str],
 ) -> Option<String> {
    for name in names {
        if let Some(&feature_index) = enum_idx.get(*name) {
            let enum_feature = &enum_features[feature_index];
            let data_index = enum_feature.data[row];
            if data_index != ENUM_NULL {
                if let Some(value) = enum_feature.values.get(data_index as usize) {
                    return Some(value.clone());
                }
            }
        }
    }
    None
 }
 pub async fn get_hexagon_properties(
    state: Arc<AppState>,
    Query(params): Query<HexagonPropertiesParams>,
 ) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
    let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
        warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
        (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
    })?;
    let cell_u64: u64 = cell.into();
    let resolution = params.resolution as usize;
    if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
        warn!(
            resolution,
            "Invalid or non-precomputed resolution for hexagon-properties"
        );
        return Err((
            StatusCode::BAD_REQUEST,
            "Invalid or non-precomputed resolution".to_string(),
        ));
    }
    let h3_str = params.h3.clone();
    let filters_str = params.filters.clone();
    let (parsed_filters, parsed_enum_filters) = parse_filters(
        params.filters.as_deref(),
        &state.data.feature_names,
        &state.data.enum_features,
    );
    let num_filters = parsed_filters.len() + parsed_enum_filters.len();
    let result = tokio::task::spawn_blocking(move || {
        let t0 = std::time::Instant::now();
        let h3_data = &state.h3_cells[resolution];
        let num_features = state.data.num_features;
        let feature_data = &state.data.feature_data;
        let enum_features = &state.data.enum_features;
        let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
        let mut matching_rows: Vec<usize> = Vec::new();
        state
            .grid
            .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
                let row = row_idx as usize;
                if h3_data[row] == cell_u64
                    && row_passes_filters(
                        row,
                        &parsed_filters,
                        &parsed_enum_filters,
                        feature_data,
                        num_features,
                        enum_features,
                    )
                {
                    matching_rows.push(row);
                }
            });
        let total = matching_rows.len();
        let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT).min(MAX_PROPERTIES_LIMIT);
        let offset = params.offset.unwrap_or(0);
        let truncated = total > offset + limit;
        let properties: Vec<Property> = matching_rows
            .iter()
            .skip(offset)
            .take(limit)
            .map(|&row| {
                let mut features = FxHashMap::default();
                let base = row * num_features;
                for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
                    let value = feature_data[base + feat_idx];
                    if value.is_finite() {
                        features.insert(feat_name.clone(), value);
                    }
                }
                Property {
                    address: non_empty_string(&state.data.address[row]),
                    postcode: non_empty_string(&state.data.postcode[row]),
                    is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
                    property_type: lookup_enum_value(
                        enum_features,
                        &state.enum_name_to_idx,
                        row,
                        &["Property type", "epc_property_type", "pp_property_type"],
                    ),
                    built_form: lookup_enum_value(
                        enum_features,
                        &state.enum_name_to_idx,
                        row,
                        &["Property type/built form", "built_form"],
                    ),
                    duration: lookup_enum_value(
                        enum_features,
                        &state.enum_name_to_idx,
                        row,
                        &["Leashold/Freehold", "duration"],
                    ),
                    current_energy_rating: lookup_enum_value(
                        enum_features,
                        &state.enum_name_to_idx,
                        row,
                        &["Current energy rating", "current_energy_rating"],
                    ),
                    potential_energy_rating: lookup_enum_value(
                        enum_features,
                        &state.enum_name_to_idx,
                        row,
                        &["Potential energy rating", "potential_energy_rating"],
                    ),
                    lat: state.data.lat[row],
                    lon: state.data.lon[row],
                    features,
                }
            })
            .collect();
        let elapsed = t0.elapsed();
        info!(
            h3 = %h3_str,
            resolution,
            total,
            returned = properties.len(),
            offset,
            filters = num_filters,
            filters_raw = filters_str.as_deref().unwrap_or("-"),
            ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
            "GET /api/hexagon-properties"
        );
        HexagonPropertiesResponse {
            properties,
            total,
            limit,
            offset,
            truncated,
        }
    })
    .await
    .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
    Ok(Json(result))
 }
--- a/server-rs/src/state.rs
+++ b/server-rs/src/state.rs
@ -0,0 +1,33 @@
 use rustc_hash::FxHashMap;
 use serde::Serialize;
 use crate::data::{POIData, PropertyData};
 use crate::grid_index::GridIndex;
 #[derive(Serialize, Clone)]
 pub struct POICategoryGroup {
    pub name: String,
    pub categories: Vec<String>,
 }
 pub struct AppState {
    pub data: PropertyData,
    pub grid: GridIndex,
    /// h3_cells[resolution][row_idx] = precomputed H3 cell ID.
    /// Empty Vec for resolutions not precomputed.
    pub h3_cells: Vec<Vec<u64>>,
    pub poi_data: POIData,
    pub poi_grid: GridIndex,
    /// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
    pub min_keys: Vec<String>,
    /// Precomputed JSON key names: "max_{feature_name}" for each numeric feature
    pub max_keys: Vec<String>,
    /// Precomputed JSON key names: "min_{enum_name}" for each enum feature
    pub enum_min_keys: Vec<String>,
    /// Precomputed JSON key names: "max_{enum_name}" for each enum feature
    pub enum_max_keys: Vec<String>,
    /// Precomputed POI category groups (sorted)
    pub poi_category_groups: Vec<POICategoryGroup>,
    /// Precomputed map from enum feature name to index in data.enum_features
    pub enum_name_to_idx: FxHashMap<String, usize>,
 }
--- a/server-rs/src/tests.rs
+++ b/server-rs/src/tests.rs
@ -0,0 +1,250 @@
 #[cfg(test)]
 mod grid_index_tests {
    use crate::grid_index::GridIndex;
    #[test]
    fn query_bounds_fully_below_grid_returns_empty() {
        let lat = vec![50.0, 50.5, 51.0];
        let lon = vec![0.0, 0.5, 1.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        let results = grid.query(10.0, -10.0, 20.0, -5.0);
        assert!(
            results.is_empty(),
            "Should return empty for bounds fully below grid"
        );
    }
    #[test]
    fn query_bounds_fully_above_grid_returns_empty() {
        let lat = vec![50.0, 50.5, 51.0];
        let lon = vec![0.0, 0.5, 1.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        let results = grid.query(80.0, 50.0, 90.0, 60.0);
        assert!(
            results.is_empty(),
            "Should return empty for bounds fully above grid"
        );
    }
    #[test]
    fn query_inverted_bounds_returns_empty() {
        let lat = vec![50.0, 50.5, 51.0];
        let lon = vec![0.0, 0.5, 1.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        // south > north
        let results = grid.query(52.0, 0.0, 49.0, 1.0);
        assert!(
            results.is_empty(),
            "Should return empty for inverted bounds"
        );
    }
    #[test]
    fn for_each_bounds_fully_outside_yields_nothing() {
        let lat = vec![50.0, 50.5, 51.0];
        let lon = vec![0.0, 0.5, 1.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        let mut count = 0;
        grid.for_each_in_bounds(10.0, -10.0, 20.0, -5.0, |_| count += 1);
        assert_eq!(
            count, 0,
            "for_each should yield nothing for out-of-bounds query"
        );
    }
    #[test]
    fn query_with_large_cells_outside_returns_empty() {
        // Previously, out-of-bounds queries with large cell sizes would
        // scan cell (0,0) which could contain data. Now returns empty.
        let lat = vec![50.0];
        let lon = vec![0.0];
        let grid = GridIndex::build(&lat, &lon, 1.0);
        let results = grid.query(0.0, -50.0, 10.0, -40.0);
        assert!(
            results.is_empty(),
            "Should return empty even with large cell size"
        );
    }
    #[test]
    fn query_within_bounds_returns_correct_results() {
        let lat = vec![50.0, 50.5, 51.0];
        let lon = vec![0.0, 0.5, 1.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        let results = grid.query(49.9, -0.1, 51.1, 1.1);
        assert_eq!(results.len(), 3, "Should return all 3 points within bounds");
    }
    #[test]
    fn query_partial_bounds_returns_subset() {
        let lat = vec![50.0, 51.0, 52.0];
        let lon = vec![0.0, 0.0, 0.0];
        let grid = GridIndex::build(&lat, &lon, 0.01);
        let results = grid.query(49.9, -0.1, 50.1, 0.1);
        assert_eq!(results.len(), 1, "Should return only the point at lat=50");
    }
 }
 #[cfg(test)]
 mod filter_tests {
    use crate::data::EnumFeatureData;
    use crate::filter::{parse_filters, row_passes_filters};
    #[test]
    fn nan_rows_fail_numeric_filter_even_with_infinite_range() {
        let feature_names = vec!["price".to_string()];
        let feature_data = vec![f64::NAN];
        let enum_features: Vec<EnumFeatureData> = vec![];
        let (numeric, enums) =
            parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features);
        assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter");
        let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_features);
        assert!(!passes, "NaN should fail filter even with infinite range");
    }
    #[test]
    fn empty_enum_filter_value_rejects_all() {
        let enum_features = vec![EnumFeatureData {
            name: "rating".to_string(),
            values: vec!["A".to_string(), "B".to_string()],
            data: vec![0],
        }];
        let feature_names: Vec<String> = vec![];
        let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features);
        assert_eq!(enums.len(), 1);
        assert!(enums[0].allowed.is_empty());
        let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_features);
        assert!(!passes, "Empty allowed set should reject all rows");
    }
    #[test]
    fn enum_filter_with_nonexistent_values_produces_empty_allowed() {
        let enum_features = vec![EnumFeatureData {
            name: "rating".to_string(),
            values: vec!["A".to_string(), "B".to_string()],
            data: vec![0],
        }];
        let feature_names: Vec<String> = vec![];
        let (_, enums) = parse_filters(Some("rating:X|Y|Z"), &feature_names, &enum_features);
        assert_eq!(enums.len(), 1);
        assert!(enums[0].allowed.is_empty());
    }
    #[test]
    fn malformed_numeric_min_is_silently_skipped() {
        let feature_names = vec!["price".to_string()];
        let enum_features: Vec<EnumFeatureData> = vec![];
        let (numeric, enums) = parse_filters(
            Some("price:not_a_number:200"),
            &feature_names,
            &enum_features,
        );
        assert_eq!(numeric.len(), 0);
        assert_eq!(enums.len(), 0);
    }
 }
 #[cfg(test)]
 mod json_tests {
    #[test]
    fn json_escaped_postcode_with_quotes_is_valid() {
        use crate::routes::hexagons::write_json_escaped;
        let mut buf = String::new();
        buf.push_str("{\"postcode\":\"");
        write_json_escaped(&mut buf, "SW1A \"test");
        buf.push_str("\"}");
        let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
        assert!(
            result.is_ok(),
            "Escaped quote should produce valid JSON: {}",
            buf
        );
        assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A \"test");
    }
    #[test]
    fn json_escaped_postcode_with_backslash_is_valid() {
        use crate::routes::hexagons::write_json_escaped;
        let mut buf = String::new();
        buf.push_str("{\"postcode\":\"");
        write_json_escaped(&mut buf, "SW1A\\2AA");
        buf.push_str("\"}");
        let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
        assert!(
            result.is_ok(),
            "Escaped backslash should produce valid JSON: {}",
            buf
        );
        assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A\\2AA");
    }
    #[test]
    fn nan_is_not_valid_json() {
        use std::fmt::Write;
        // Verify that raw NaN in write! is still invalid JSON (documenting the risk
        // that the is_finite() guard in write_hexagons_json protects against).
        let mut buf = String::new();
        write!(buf, "{{\"min_price\":{}}}", f64::NAN).unwrap();
        let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
        assert!(result.is_err(), "Raw NaN should produce invalid JSON");
    }
    #[test]
    fn infinity_is_not_valid_json() {
        use std::fmt::Write;
        let mut buf = String::new();
        write!(buf, "{{\"min_price\":{}}}", f64::INFINITY).unwrap();
        let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
        assert!(result.is_err(), "Raw Infinity should produce invalid JSON");
    }
 }
 #[cfg(test)]
 mod enum_encoding_tests {
    #[test]
    fn u8_cast_wraps_around_beyond_255() {
        // Documents the underlying u8 wrapping behavior that the truncation
        // guard in property.rs now prevents.
        let num_values = 300usize;
        let indices: Vec<u8> = (0..num_values).map(|index| index as u8).collect();
        assert_eq!(indices[0], indices[256], "u8 wraps: 0 == 256");
        assert_eq!(indices[1], indices[257], "u8 wraps: 1 == 257");
        use std::collections::HashMap;
        let values: Vec<String> = (0..num_values).map(|i| format!("val_{}", i)).collect();
        let value_to_idx: HashMap<&str, u8> = values
            .iter()
            .enumerate()
            .map(|(index, value)| (value.as_str(), index as u8))
            .collect();
        let unique_indices: std::collections::HashSet<u8> =
            value_to_idx.values().cloned().collect();
        assert!(
            unique_indices.len() < num_values,
            "Without the truncation guard, {} values produce only {} unique u8 indices",
            num_values,
            unique_indices.len()
        );
    }
 }
--- a/server/init.py
+++ b/server/init.py
--- a/server/config.py
+++ b/server/config.py
@ -1,30 +0,0 @@
 """Server configuration - imports shared values from pipeline config."""
 from pipeline.config import (
    AGGREGATES_DIR,
    H3_RESOLUTIONS as VALID_RESOLUTIONS,
    DEFAULT_H3_RESOLUTION as DEFAULT_RESOLUTION,
    MIN_YEAR,
    MAX_YEAR,
    DEFAULT_MIN_YEAR,
    DEFAULT_MAX_YEAR,
    DEFAULT_MIN_PRICE,
    DEFAULT_MAX_PRICE,
 )
 # Extra area to return beyond requested bounds (0.2 = 20%)
 # Makes panning smoother by preloading nearby hexagons
 BOUNDS_BUFFER_PERCENT = 0.2
 __all__ = [
    "AGGREGATES_DIR",
    "VALID_RESOLUTIONS",
    "DEFAULT_RESOLUTION",
    "MIN_YEAR",
    "MAX_YEAR",
    "DEFAULT_MIN_YEAR",
    "DEFAULT_MAX_YEAR",
    "DEFAULT_MIN_PRICE",
    "DEFAULT_MAX_PRICE",
    "BOUNDS_BUFFER_PERCENT",
 ]
--- a/server/main.py
+++ b/server/main.py
@ -1,35 +0,0 @@
 from contextlib import asynccontextmanager
 from pathlib import Path
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from server.routes import hexagons, pois
@asynccontextmanager
 async def lifespan(app: FastAPI):
    # Startup: preload all parquet files
    hexagons.preload_dataframes()
    pois.preload_pois()
    yield
    # Shutdown: nothing to clean up
 app = FastAPI(title="Property Map API", lifespan=lifespan)
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=False,  # Cannot use True with wildcard origins
    allow_methods=["*"],
    allow_headers=["*"],
 )
 app.include_router(hexagons.router, prefix="/api")
 app.include_router(pois.router, prefix="/api")
 # Mount static files for production (frontend build)
 frontend_dist = Path(__file__).parent.parent / "frontend" / "dist"
 if frontend_dist.exists():
    app.mount("/", StaticFiles(directory=frontend_dist, html=True), name="static")
--- a/Show more
+++ b/Show more
		`@ -0,0 +1 @@`
							`allowed-idents-below-min-chars = ["i", "j", "k", "_"]`