Merge branch 'main' of https://github.com/rubyhrzhang/property-map
This commit is contained in:
commit
897dae77ac
104 changed files with 16454 additions and 4622 deletions
13
.dockerignore
Normal file
13
.dockerignore
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
data/
|
||||||
|
data_sources/
|
||||||
|
.venv
|
||||||
|
**/node_modules
|
||||||
|
**/dist
|
||||||
|
server-rs/target
|
||||||
|
.git
|
||||||
|
.task
|
||||||
|
.claude
|
||||||
|
__pycache__
|
||||||
|
*.parquet
|
||||||
|
analyses/
|
||||||
|
*.log
|
||||||
49
.github/workflows/docker.yml
vendored
Normal file
49
.github/workflows/docker.yml
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
name: Docker
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Log in to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Extract metadata
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
tags: |
|
||||||
|
type=raw,value=latest
|
||||||
|
type=sha,prefix=sha-,format=short
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -5,3 +5,6 @@ tfl_journey_client
|
||||||
**/node_modules
|
**/node_modules
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
**/dist
|
**/dist
|
||||||
|
server-rs/target
|
||||||
|
.task
|
||||||
|
data
|
||||||
|
|
|
||||||
31
.vscode/extensions.json
vendored
Normal file
31
.vscode/extensions.json
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"esbenp.prettier-vscode",
|
||||||
|
"dbaeumer.vscode-eslint",
|
||||||
|
"ms-toolsai.jupyter",
|
||||||
|
"ms-python.python",
|
||||||
|
"GitHub.copilot",
|
||||||
|
"ms-azuretools.vscode-docker",
|
||||||
|
"redhat.vscode-yaml",
|
||||||
|
"1yib.rust-bundle",
|
||||||
|
"alexcvzz.vscode-sqlite",
|
||||||
|
"esbenp.prettier-vscode",
|
||||||
|
"dbaeumer.vscode-eslint",
|
||||||
|
"ms-python.python",
|
||||||
|
"ms-toolsai.jupyter",
|
||||||
|
"ms-azuretools.vscode-docker",
|
||||||
|
"redhat.vscode-yaml",
|
||||||
|
"tomoki1207.pdf",
|
||||||
|
"qwtel.sqlite-viewer",
|
||||||
|
"alexcvzz.vscode-sqlite",
|
||||||
|
"rust-lang.rust-analyzer",
|
||||||
|
"pkief.material-icon-theme",
|
||||||
|
"detachhead.basedpyright",
|
||||||
|
"editorconfig.editorconfig",
|
||||||
|
"davidanson.vscode-markdownlint",
|
||||||
|
"charliermarsh.ruff",
|
||||||
|
"timonwong.shellcheck",
|
||||||
|
"tonybaloney.vscode-pets",
|
||||||
|
"vadimcn.vscode-lldb"
|
||||||
|
]
|
||||||
|
}
|
||||||
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
|
|
@ -2,6 +2,9 @@
|
||||||
"files.exclude": {
|
"files.exclude": {
|
||||||
"*.venv": true,
|
"*.venv": true,
|
||||||
"**/__pycache__": true,
|
"**/__pycache__": true,
|
||||||
"**/node_modules": true
|
"**/node_modules": true,
|
||||||
|
"**/.ruff_cache":true,
|
||||||
|
"**/.pytest_cache":true,
|
||||||
|
"**/target":true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
244
CLAUDE.md
244
CLAUDE.md
|
|
@ -2,68 +2,228 @@
|
||||||
|
|
||||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
NEVER EVER RUN GIT COMMANDS!!
|
||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
Property Map is a full-stack geospatial web application that visualizes UK property price data aggregated by H3 hexagonal spatial indices. It combines Land Registry price data with postcode geolocation to create an interactive map for exploring property markets.
|
Property Map is a full-stack geospatial application for visualizing UK property data on an interactive map. It combines Land Registry price-paid data, EPC energy certificates, postcode geolocation, TFL journey times, Index of Deprivation scores, crime statistics, ethnicity data, broadband speeds, school ratings, road noise, and OpenStreetMap POIs into a single wide parquet file, then serves aggregated H3 hexagon statistics and POI data via a Rust backend.
|
||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
All commands use [Task](https://taskfile.dev) runner. Install with: `curl -1sLf 'https://dl.cloudsmith.io/public/task/task/setup.deb.sh' | sudo -E bash`
|
All commands use [Task](https://taskfile.dev) runner. Python uses `uv run`. Frontend uses `npm run` from `frontend/`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Initial setup (downloads ~GB of data, runs pipeline)
|
# Development servers
|
||||||
task prepare
|
task dev:server # Rust backend on :8001 (cargo run --release)
|
||||||
|
task dev:frontend # Webpack dev server on :3030 (proxies /api to :8001)
|
||||||
|
|
||||||
# Development (run in separate terminals)
|
# Data pipeline
|
||||||
task server # FastAPI backend on :8001
|
task prepare # Build wide.parquet from all pre-downloaded sources
|
||||||
task frontend # Webpack dev server on :3030 (proxies /api to :8001)
|
|
||||||
|
|
||||||
# Code quality
|
# Quality
|
||||||
task lint # Lint Python (ruff) + TypeScript (ESLint + Prettier)
|
task lint # Lint all: Python (ruff) + TypeScript (ESLint+Prettier) + Rust (clippy+fmt)
|
||||||
task format # Auto-fix formatting
|
task format # Auto-fix formatting for all languages
|
||||||
task typecheck # TypeScript type checking
|
task test # Python tests (fuzzy join, haversine, POI counts)
|
||||||
task check # All checks (lint + typecheck + build)
|
task check # Full validation: lint + build + test
|
||||||
|
|
||||||
# Production
|
# Building
|
||||||
task build # Build frontend
|
task build:frontend # TypeScript typecheck + webpack production build
|
||||||
task prod # Serve built frontend via FastAPI
|
task build:server # cargo build --release (NOTE: dir is wrong in Taskfile, run from server-rs/)
|
||||||
|
|
||||||
|
# Granular lint/format
|
||||||
|
task lint:python # uv run ruff check .
|
||||||
|
task lint:frontend # eslint + prettier --check
|
||||||
|
task lint:rust # cargo clippy -- -D warnings && cargo fmt --check
|
||||||
|
task format:python # ruff check --fix && ruff format
|
||||||
|
task format:frontend # eslint --fix + prettier --write
|
||||||
|
task format:rust # cargo fmt --all
|
||||||
|
```
|
||||||
|
|
||||||
|
Running individual tests:
|
||||||
|
```bash
|
||||||
|
uv run pytest pipeline/utils/test_haversine.py # Single test file
|
||||||
|
uv run pytest pipeline/utils/test_haversine.py -k "test_name" # Single test
|
||||||
```
|
```
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
|
### Data Flow
|
||||||
|
|
||||||
```
|
```
|
||||||
frontend/ React + TypeScript SPA (deck.gl/MapLibre for visualization)
|
Raw sources → [Download scripts] → data/*.parquet
|
||||||
src/App.tsx Main component with filters and map state
|
→ [Fuzzy join EPC ↔ Price-Paid] → epc_pp.parquet
|
||||||
src/components/ Map.tsx (deck.gl H3HexagonLayer), Filters UI
|
→ [Merge all datasets] → wide.parquet
|
||||||
|
→ [Rust server loads into memory + precomputes H3 + spatial grid]
|
||||||
server/ FastAPI backend
|
→ [Frontend renders deck.gl H3HexagonLayer over MapLibre GL]
|
||||||
main.py App setup, CORS, static file mounting
|
|
||||||
routes/hexagons.py GET /api/hexagons - returns aggregated price data
|
|
||||||
|
|
||||||
pipeline/ Data processing (Polars + H3)
|
|
||||||
config.py Central config (H3 resolutions 6-11, year/price ranges)
|
|
||||||
sources/ Postcode loading, property price joins
|
|
||||||
processors/ H3 aggregation (count, avg/median/min/max by cell+year)
|
|
||||||
|
|
||||||
tfl_journey_client/ Generated TFL API client (local package)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Data Flow
|
### Data Pipeline (`pipeline/`)
|
||||||
|
|
||||||
1. **Download**: Land Registry prices + ArcGIS postcode→lat/lon mappings → `data_sources/`
|
Python + Polars. Two phases:
|
||||||
2. **Pipeline**: Join data, compute H3 indices, aggregate stats → `data_sources/processed/aggregates/*.parquet`
|
|
||||||
3. **Serve**: Load parquet files into memory, filter by bounds/year/price, return as GeoJSON-like response
|
|
||||||
4. **Visualize**: Frontend fetches on viewport change, renders hexagons colored by average price
|
|
||||||
|
|
||||||
## Tech Stack
|
1. **Download** (`pipeline/download/`) — Each script fetches one raw dataset into `data/`
|
||||||
|
2. **Transform** (`pipeline/transform/`) — Joins and derives features:
|
||||||
|
- `join_epc_pp.py` — Fuzzy-joins EPC ↔ price-paid by address within postcode buckets
|
||||||
|
- `merge.py` — **Main pipeline**: joins all datasets → `wide.parquet` with human-readable column names
|
||||||
|
- `transform_poi.py` — Filters POIs, maps to friendly names + emoji (exhaustive category validation)
|
||||||
|
- `poi_proximity.py` — Counts POIs within 2km per postcode using 0.05° spatial grid
|
||||||
|
- `crime.py` — Aggregates crime CSVs into yearly averages by LSOA
|
||||||
|
|
||||||
- **Frontend**: React 18, TypeScript, Webpack, TailwindCSS, deck.gl, MapLibre GL
|
**Critical: column renaming in `merge.py`** — The pipeline renames columns from snake_case to human-readable names before writing `wide.parquet`. The Rust server auto-discovers features from whatever column names exist in the parquet. Key renames:
|
||||||
- **Backend**: Python 3.12, FastAPI, Polars, H3
|
- `pp_address` → `Address per Property Register`
|
||||||
- **Package managers**: `uv` (Python), `npm` (frontend)
|
- `postcode` → `Postcode`
|
||||||
|
- `latest_price` → `Last known price`
|
||||||
|
- `duration` → `Leashold/Freehold`
|
||||||
|
- `total_floor_area` → `Total floor area (sqm)`
|
||||||
|
- `current_energy_rating` → `Current energy rating`
|
||||||
|
|
||||||
|
The server and frontend must handle these human-readable names. See the full rename map in `merge.py`.
|
||||||
|
|
||||||
|
### Backend (`server-rs/`)
|
||||||
|
|
||||||
|
Rust + Axum. Loads parquet into memory at startup.
|
||||||
|
|
||||||
|
**Structure:**
|
||||||
|
- `data/property.rs` — Loads `wide.parquet`, auto-discovers numeric + enum features, computes histograms, sorts rows by spatial locality, precomputes H3 cells (resolutions 4–12)
|
||||||
|
- `data/poi.rs` — Loads `filtered_uk_pois.parquet`
|
||||||
|
- `index.rs` — `GridIndex`: 0.01° spatial grid for O(1) cell lookup
|
||||||
|
- `filter.rs` — Parses filter strings and checks rows. Format: `name:min:max` (numeric), `name:val1|val2` (enum)
|
||||||
|
- `routes/` — One file per endpoint
|
||||||
|
- `consts.rs` — Key constants (histogram bins, H3 range, max enum cardinality, excluded columns)
|
||||||
|
|
||||||
|
**API endpoints:**
|
||||||
|
- `GET /api/features` — Feature metadata with histograms and 2nd/98th percentiles
|
||||||
|
- `GET /api/hexagons?resolution=&bounds=&filters=` — H3 aggregates (min/max per feature per hex)
|
||||||
|
- `GET /api/hexagon-properties?h3=&resolution=&filters=&limit=&offset=` — Paginated properties within a hexagon
|
||||||
|
- `GET /api/pois?bounds=&categories=` — POIs by bounds (max 5000)
|
||||||
|
- `GET /api/poi-categories` — Available POI category names
|
||||||
|
|
||||||
|
Serves `frontend/dist/` as static fallback in production.
|
||||||
|
|
||||||
|
**Data representation:**
|
||||||
|
- Numeric features: row-major flat `Vec<f64>`, NaN = null
|
||||||
|
- Enum features: `Vec<u8>` indices into value list, 255 = null
|
||||||
|
- String fields (address, postcode): `Vec<String>`, empty = null
|
||||||
|
- The server accepts the parquet path as a CLI argument (defaults to `data_sources/processed/wide.parquet`)
|
||||||
|
|
||||||
|
### Frontend (`frontend/`)
|
||||||
|
|
||||||
|
React 18 + TypeScript. deck.gl `H3HexagonLayer` over MapLibre GL. TailwindCSS. No state management library — pure React hooks.
|
||||||
|
|
||||||
|
**Key patterns:**
|
||||||
|
- `App.tsx` manages all state, API fetching (150ms debounce), and URL state sync (300ms debounce)
|
||||||
|
- URL encodes view/filters/POI categories/active tab as query params for shareable links
|
||||||
|
- AbortControllers cancel in-flight requests on new queries
|
||||||
|
- Zoom → H3 resolution: `<7→7, <9.5→8, <11→9, <13→10, ≥13→11`
|
||||||
|
- Bounds quantized to 0.01° to match backend caching
|
||||||
|
- Properties pane uses feature names from API response (human-readable), not hardcoded field names
|
||||||
|
- Proxy: dev server on :3030 proxies `/api` to :8001; also handles VS Code `/proxy/PORT` patterns
|
||||||
|
|
||||||
|
## Frontend Design Guide (STRICT — must be followed for all UI changes)
|
||||||
|
|
||||||
|
The frontend uses Tailwind's `darkMode: 'class'` strategy. The `dark` class is toggled on `<html>`. Every visible element must have both light and dark styles. **Never add a light-only color class without its `dark:` counterpart.** Run `task build:frontend` after any UI change to verify.
|
||||||
|
|
||||||
|
### Theme System
|
||||||
|
|
||||||
|
- **State**: `App.tsx` owns a `theme` state (`'light' | 'dark' | 'system'`), persisted in `localStorage` under the key `theme`, default `'system'`.
|
||||||
|
- **Effective theme**: When `'system'`, resolved via `window.matchMedia('(prefers-color-scheme: dark)')`. A `change` listener re-renders on OS preference flip.
|
||||||
|
- **Toggle cycle**: light → dark → system → light. Three-way, not binary.
|
||||||
|
- **Flash prevention**: `index.html` contains an inline `<script>` that applies the `dark` class before first paint. If the localStorage/matchMedia logic in that script changes, update it to match `App.tsx`.
|
||||||
|
- **Prop plumbing**: `effectiveTheme` (`'light' | 'dark'`) is passed as a prop to `<Map>` and `<HomePage>`. Components that need the resolved theme must receive it as a prop — do not read localStorage or matchMedia inside child components.
|
||||||
|
|
||||||
|
### Color Token Reference
|
||||||
|
|
||||||
|
Every UI element must use the correct token from this table. Do not invent new pairings.
|
||||||
|
|
||||||
|
| Role | Light class | Dark class | Hex (dark) |
|
||||||
|
|------|------------|------------|------------|
|
||||||
|
| **Page / pane background** | `bg-warm-50` or `bg-white` | `dark:bg-warm-900` | #1c1917 |
|
||||||
|
| **Card / elevated surface** | `bg-white` | `dark:bg-warm-800` | #292524 |
|
||||||
|
| **Inset / recessed surface** | `bg-warm-100` or `bg-warm-50` | `dark:bg-warm-800` | #292524 |
|
||||||
|
| **Input / select background** | `bg-white` | `dark:bg-warm-800` or `dark:bg-warm-900` | |
|
||||||
|
| **Primary border** | `border-warm-200` | `dark:border-warm-700` | #44403c |
|
||||||
|
| **Subtle border (dividers)** | `border-warm-100` | `dark:border-warm-800` | #292524 |
|
||||||
|
| **Primary text (headings)** | `text-navy-950` or implicit dark | `dark:text-warm-100` | #f5f5f4 |
|
||||||
|
| **Body text** | `text-warm-700` | `dark:text-warm-300` | #d6d3d1 |
|
||||||
|
| **Secondary text (labels, hints)** | `text-warm-500` or `text-warm-600` | `dark:text-warm-400` | #a8a29e |
|
||||||
|
| **Disabled / placeholder text** | `text-warm-400` / `placeholder-warm-400` | `dark:text-warm-500` / `dark:placeholder-warm-500` | #78716c |
|
||||||
|
| **Accent text (links, actions)** | `text-teal-600` | `dark:text-teal-400` | #1de4c3 |
|
||||||
|
| **Accent hover text** | `hover:text-teal-800` | `dark:hover:text-teal-300` | #51f7d9 |
|
||||||
|
| **Accent background (highlights)** | `bg-teal-50` | `dark:bg-teal-900/30` | |
|
||||||
|
| **Active ring / focus ring** | `ring-teal-400` | same — works in both | |
|
||||||
|
| **Price / key metric text** | `text-teal-700` | `dark:text-teal-400` | |
|
||||||
|
| **Remove / close button** | `text-warm-400 hover:text-warm-700` | `dark:hover:text-warm-300` | |
|
||||||
|
| **Checkbox accent** | `accent-teal-600` | same — works in both | |
|
||||||
|
| **Header (unchanged both modes)** | `bg-navy-900 text-white` | same | |
|
||||||
|
|
||||||
|
### Mapping Rules for Specific Contexts
|
||||||
|
|
||||||
|
**Sidebars (Filters, POIPane, PropertiesPane, right-pane tabs):**
|
||||||
|
- Container: `bg-white dark:bg-warm-900`
|
||||||
|
- Inner cards / dropdown menus: `bg-white dark:bg-warm-800`
|
||||||
|
- Borders: `border-warm-200 dark:border-warm-700`
|
||||||
|
- Tab text (active): add `dark:text-warm-100`
|
||||||
|
- Tab text (inactive): `text-warm-600 dark:text-warm-400`
|
||||||
|
|
||||||
|
**Map overlays (PostcodeSearch, MapLegend, POI popup, loading indicator):**
|
||||||
|
- Background: `bg-white dark:bg-warm-800`
|
||||||
|
- Text: `dark:text-warm-200`
|
||||||
|
- Semi-transparent variants: use `/90` opacity suffix (e.g. `dark:bg-warm-800/90`)
|
||||||
|
- Deck.gl tooltip (inline styles, not Tailwind): use `#292524` bg / `#e7e5e4` text / `rgba(0,0,0,0.5)` shadow in dark.
|
||||||
|
- Deck.gl postcode labels (RGB arrays): `[220,220,220,220]` text / `[30,30,30,200]` outline in dark; inverse in light.
|
||||||
|
|
||||||
|
**Map basemaps:**
|
||||||
|
- Light: `https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json`
|
||||||
|
- Dark: `https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json`
|
||||||
|
- `handleMapLoad` must only apply label/water tweaks in light mode. Dark Matter has good defaults.
|
||||||
|
|
||||||
|
**HomePage (landing page):**
|
||||||
|
- Page bg: `bg-warm-50 dark:bg-warm-900`
|
||||||
|
- Cards: `bg-white dark:bg-warm-800` with `border-warm-200 dark:border-warm-700`
|
||||||
|
- Backdrop-blur panels: use `/60` or `/40` opacity on both `bg-warm-50` and `dark:bg-warm-900`
|
||||||
|
- HexCanvas: reads `isDark` ref; uses dimmer fill (`#058172`) and stroke (`#0a665b`) at 60% opacity multiplier.
|
||||||
|
- All headings: `dark:text-warm-100`. All body: `dark:text-warm-300` or `dark:text-warm-400`.
|
||||||
|
|
||||||
|
**DataSourcesPage:**
|
||||||
|
- Same card pattern as above. Footer is already dark (`bg-navy-900`) — no changes needed.
|
||||||
|
- License badges: `bg-warm-100 dark:bg-warm-700 text-warm-600 dark:text-warm-300`
|
||||||
|
- Links: `text-teal-600 dark:text-teal-400`
|
||||||
|
|
||||||
|
**DataSources floating button (on map):**
|
||||||
|
- `bg-white/90 dark:bg-warm-800/90` with `text-teal-600 dark:text-teal-400`
|
||||||
|
|
||||||
|
### Rules for New Components
|
||||||
|
|
||||||
|
1. **Every `bg-white` needs `dark:bg-warm-800` or `dark:bg-warm-900`.** Pane-level = warm-900, card-level = warm-800.
|
||||||
|
2. **Every `border-warm-200` needs `dark:border-warm-700`.**
|
||||||
|
3. **Every `text-warm-*` needs a `dark:text-warm-*` counterpart.** Follow the token table — don't guess.
|
||||||
|
4. **Every `text-teal-600` needs `dark:text-teal-400`.** Every `hover:text-teal-800` needs `dark:hover:text-teal-300`.
|
||||||
|
5. **Every `bg-teal-50` needs `dark:bg-teal-900/30`.**
|
||||||
|
6. **Every `hover:bg-warm-50` needs `dark:hover:bg-warm-700` or `dark:hover:bg-warm-800`.**
|
||||||
|
7. **Inputs and selects**: always add `dark:bg-warm-800 dark:text-warm-200 dark:border-warm-700`. Placeholders get `dark:placeholder-warm-500`.
|
||||||
|
8. **Checkboxes**: always include `accent-teal-600 rounded`.
|
||||||
|
9. **Do not use Tailwind `dark:` classes inside deck.gl layers or canvas code.** Use the `theme` prop / ref and conditional JS values.
|
||||||
|
10. **Do not add `transition-*` classes for theme switching.** The global CSS rule in `index.css` handles transitions for `background-color`, `border-color`, and `color` on all standard HTML elements. Adding per-element transition classes will conflict.
|
||||||
|
11. **Never hardcode hex colors in JSX `style=` props for themed elements** (except deck.gl tooltip and canvas, which can't use Tailwind). Use the Tailwind classes from the token table instead.
|
||||||
|
12. **The header (`bg-navy-900`) is identical in both themes.** Do not add dark variants to it.
|
||||||
|
|
||||||
|
### Verification Checklist (for any UI PR)
|
||||||
|
|
||||||
|
- [ ] `task build:frontend` passes with no errors
|
||||||
|
- [ ] Every new `bg-*`, `text-*`, `border-*` class has a `dark:` counterpart (search your diff)
|
||||||
|
- [ ] Toggle through all three modes (light → dark → system) with no flash
|
||||||
|
- [ ] Map basemap switches when theme changes
|
||||||
|
- [ ] Sidebars, dropdowns, and popups are readable in both modes
|
||||||
|
- [ ] HomePage and DataSourcesPage adapt correctly
|
||||||
|
|
||||||
## Key Implementation Details
|
## Key Implementation Details
|
||||||
|
|
||||||
- Backend caches dataframes in memory and uses LRU cache on queries
|
- **Spatial sort**: Rows sorted by 0.01° grid cell at load time for cache-friendly sequential access
|
||||||
- Bounds rounded to 0.01° precision to improve cache hits
|
- **Row-major layout**: `feature_data[row * num_features + feat_idx]` — all features for one property are contiguous
|
||||||
- Results capped at 50,000 hexagons per request (truncated flag in response)
|
- **H3 precomputation**: Resolutions 4–12 computed in parallel (rayon) at startup
|
||||||
- Frontend debounces API calls on map movement
|
- **Histogram percentiles without sorting**: O(n) two-pass algorithm — build histogram, interpolate percentiles
|
||||||
|
- **Direct JSON writing**: Hexagon endpoint writes JSON via string buffer, avoids serde_json::Value allocations
|
||||||
|
- **POI transform validation**: Fails if any OSM category is unmapped — guarantees exhaustive coverage
|
||||||
|
- **Fuzzy join**: Groups by postcode, uses `thefuzz.token_sort_ratio` with numeric token compatibility, greedy assignment from highest score
|
||||||
|
- **Filter bounds format**: `south,west,north,east` (not standard bbox order)
|
||||||
|
- **POI proximity**: Uses 0.05° grid (~5km cells) to reduce candidates before haversine distance check
|
||||||
|
|
|
||||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
# Stage 1: Build frontend
|
||||||
|
FROM node:20-slim AS frontend
|
||||||
|
WORKDIR /app/frontend
|
||||||
|
COPY frontend/package.json frontend/package-lock.json ./
|
||||||
|
RUN npm ci
|
||||||
|
COPY frontend/ ./
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Stage 2: Build Rust server
|
||||||
|
FROM rust:1.83-bookworm AS server
|
||||||
|
WORKDIR /app
|
||||||
|
COPY server-rs/ server-rs/
|
||||||
|
WORKDIR /app/server-rs
|
||||||
|
RUN cargo build --release
|
||||||
|
|
||||||
|
# Stage 3: Runtime
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=server /app/server-rs/target/release/property-map-server ./
|
||||||
|
COPY --from=frontend /app/frontend/dist ./dist/
|
||||||
|
|
||||||
|
EXPOSE 8001
|
||||||
|
ENTRYPOINT ["./property-map-server"]
|
||||||
|
CMD ["--data", "/data/wide.parquet", "--pois", "/data/filtered_uk_pois.parquet"]
|
||||||
1170
Journey.yaml
1170
Journey.yaml
File diff suppressed because it is too large
Load diff
|
|
@ -69,3 +69,11 @@ Nice to haves?
|
||||||
- [Local Autheority (Upper Tier)](https://communitiesopendata-communities.hub.arcgis.com/datasets/6e8edb2974da4834bbafa09644a5b02d_0/explore?location=52.684195%2C-2.489482%2C7.17)
|
- [Local Autheority (Upper Tier)](https://communitiesopendata-communities.hub.arcgis.com/datasets/6e8edb2974da4834bbafa09644a5b02d_0/explore?location=52.684195%2C-2.489482%2C7.17)
|
||||||
- [Open Geography](https://geoportal.statistics.gov.uk/)
|
- [Open Geography](https://geoportal.statistics.gov.uk/)
|
||||||
- [CommunitiesOpenData](https://communitiesopendata-communities.hub.arcgis.com/)
|
- [CommunitiesOpenData](https://communitiesopendata-communities.hub.arcgis.com/)
|
||||||
|
- [PlanetOSM](https://planet.openstreetmap.org/) for open street map POI
|
||||||
|
- [TFL api](https://api-portal.tfl.gov.uk/signin)
|
||||||
|
- [EPC](https://epc.opendatacommunities.org/login) - <https://epc.opendatacommunities.org/downloads/domestic>
|
||||||
|
|
||||||
|
rightmove:
|
||||||
|
curl '<https://www.rightmove.co.uk/api/property-search/listing/search?searchLocation=E14&useLocationIdentifier=true&locationIdentifier=OUTCODE%5E749&buy=For+sale&radius=20.0&_includeSSTC=on&index=0&sortType=2&channel=BUY&transactionType=BUY>'
|
||||||
|
|
||||||
|
curl '<https://www.onthemarket.com/async/search/properties-v2/?search-type=for-sale&location-id=e13&view=map-list>'
|
||||||
|
|
|
||||||
191
Taskfile.data.yml
Normal file
191
Taskfile.data.yml
Normal file
|
|
@ -0,0 +1,191 @@
|
||||||
|
version: '3'
|
||||||
|
|
||||||
|
vars:
|
||||||
|
DATA_DIR: /bulk/property-data
|
||||||
|
ARCGIS_OUTPUT: "{{.DATA_DIR}}/arcgis_data.parquet"
|
||||||
|
PRICE_PAID_OUTPUT: "{{.DATA_DIR}}/price-paid-complete.parquet"
|
||||||
|
IOD_OUTPUT: "{{.DATA_DIR}}/IoD2025_Scores.parquet"
|
||||||
|
POIS_RAW_OUTPUT: "{{.DATA_DIR}}/uk_pois.parquet"
|
||||||
|
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||||
|
POI_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/poi_proximity.parquet"
|
||||||
|
EPC_PP_OUTPUT: "{{.DATA_DIR}}/epc_pp.parquet"
|
||||||
|
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||||
|
EPC: "{{.DATA_DIR}}/certificates.csv"
|
||||||
|
JOURNEY_TIMES: "./data_sources/processed/journey_times_bank_checkpoint.parquet"
|
||||||
|
ETHNICITY_OUTPUT: "{{.DATA_DIR}}/ethnicity_by_la.parquet"
|
||||||
|
CRIME_DIR: "{{.DATA_DIR}}/crime"
|
||||||
|
CRIME_OUTPUT: "{{.DATA_DIR}}/crime_by_lsoa.parquet"
|
||||||
|
NOISE_OUTPUT: "{{.DATA_DIR}}/road_noise.parquet"
|
||||||
|
OFSTED_OUTPUT: "{{.DATA_DIR}}/ofsted.parquet"
|
||||||
|
NAPTAN_OUTPUT: "{{.DATA_DIR}}/naptan.parquet"
|
||||||
|
BROADBAND_OUTPUT: "{{.DATA_DIR}}/broadband.parquet"
|
||||||
|
SCHOOL_PROXIMITY_OUTPUT: "{{.DATA_DIR}}/school_proximity.parquet"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
prompt:epc:
|
||||||
|
desc: Prompt user to download EPC dataset (requires registration)
|
||||||
|
status:
|
||||||
|
- test -f {{.EPC}}
|
||||||
|
cmds:
|
||||||
|
- |
|
||||||
|
echo ""
|
||||||
|
echo "=== EPC dataset not found ==="
|
||||||
|
echo "The EPC certificates file is required: {{.EPC}}"
|
||||||
|
echo ""
|
||||||
|
echo "To obtain it, register at https://epc.opendatacommunities.org/login"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
prompt:journey-times:
|
||||||
|
desc: Download TFL journey times if missing (requires API key registration)
|
||||||
|
status:
|
||||||
|
- test -f {{.JOURNEY_TIMES}}
|
||||||
|
deps:
|
||||||
|
- download:arcgis
|
||||||
|
cmds:
|
||||||
|
- |
|
||||||
|
echo ""
|
||||||
|
echo "=== TFL journey times not found ==="
|
||||||
|
echo "Register for a TFL API key at https://api-portal.tfl.gov.uk/signin"
|
||||||
|
echo "Then set the TFL_API_KEY environment variable and re-run this task."
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
download:arcgis:
|
||||||
|
desc: Download and convert ArcGIS postcode data
|
||||||
|
status:
|
||||||
|
- test -f {{.ARCGIS_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.arcgis --output {{.ARCGIS_OUTPUT}}
|
||||||
|
|
||||||
|
download:price-paid:
|
||||||
|
desc: Download and convert Land Registry price-paid data
|
||||||
|
status:
|
||||||
|
- test -f {{.PRICE_PAID_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.price_paid --output {{.PRICE_PAID_OUTPUT}}
|
||||||
|
|
||||||
|
download:deprivation:
|
||||||
|
desc: Download and convert Index of Deprivation data
|
||||||
|
status:
|
||||||
|
- test -f {{.IOD_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.deprivation_data --output {{.IOD_OUTPUT}}
|
||||||
|
|
||||||
|
download:ethnicity:
|
||||||
|
desc: Download ethnicity by local authority data
|
||||||
|
status:
|
||||||
|
- test -f {{.ETHNICITY_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.ethnicity --output {{.ETHNICITY_OUTPUT}}
|
||||||
|
|
||||||
|
download:naptan:
|
||||||
|
desc: Download NaPTAN station data
|
||||||
|
status:
|
||||||
|
- test -f {{.NAPTAN_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.naptan --output {{.NAPTAN_OUTPUT}}
|
||||||
|
|
||||||
|
download:pois:
|
||||||
|
desc: Download and extract POIs from OpenStreetMap
|
||||||
|
status:
|
||||||
|
- test -f {{.POIS_RAW_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.pois --output {{.POIS_RAW_OUTPUT}}
|
||||||
|
|
||||||
|
download:ofsted:
|
||||||
|
desc: Download Ofsted school inspection outcomes
|
||||||
|
status:
|
||||||
|
- test -f {{.OFSTED_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.ofsted --output {{.OFSTED_OUTPUT}}
|
||||||
|
|
||||||
|
download:broadband:
|
||||||
|
desc: Download Ofcom broadband performance data
|
||||||
|
status:
|
||||||
|
- test -f {{.BROADBAND_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.broadband --output {{.BROADBAND_OUTPUT}}
|
||||||
|
|
||||||
|
download:noise:
|
||||||
|
desc: Download Defra noise data (road, rail, airport) sampled at postcode centroids
|
||||||
|
deps:
|
||||||
|
- download:arcgis
|
||||||
|
status:
|
||||||
|
- test -f {{.NOISE_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.download.noise --arcgis {{.ARCGIS_OUTPUT}} --output {{.NOISE_OUTPUT}}
|
||||||
|
|
||||||
|
transform:pois:
|
||||||
|
desc: Transform raw POIs to filtered version with friendly names
|
||||||
|
deps:
|
||||||
|
- download:pois
|
||||||
|
- download:naptan
|
||||||
|
status:
|
||||||
|
- test -f {{.POIS_FILTERED_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.transform_poi --input {{.POIS_RAW_OUTPUT}} --naptan {{.NAPTAN_OUTPUT}} --output {{.POIS_FILTERED_OUTPUT}}
|
||||||
|
|
||||||
|
transform:epc-pp:
|
||||||
|
desc: Fuzzy join EPC and Price Paid data
|
||||||
|
deps:
|
||||||
|
- download:price-paid
|
||||||
|
- prompt:epc
|
||||||
|
status:
|
||||||
|
- test -f {{.EPC_PP_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.join_epc_pp --epc {{.EPC}} --price-paid {{.PRICE_PAID_OUTPUT}} --output {{.EPC_PP_OUTPUT}}
|
||||||
|
|
||||||
|
transform:crime:
|
||||||
|
desc: Transform crime CSVs into yearly averages by LSOA
|
||||||
|
status:
|
||||||
|
- test -f {{.CRIME_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.crime --input {{.CRIME_DIR}} --output {{.CRIME_OUTPUT}}
|
||||||
|
|
||||||
|
transform:poi-proximity:
|
||||||
|
desc: Compute POI proximity counts per postcode
|
||||||
|
deps:
|
||||||
|
- download:arcgis
|
||||||
|
- transform:pois
|
||||||
|
status:
|
||||||
|
- test -f {{.POI_PROXIMITY_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.poi_proximity --arcgis {{.ARCGIS_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}} --output {{.POI_PROXIMITY_OUTPUT}}
|
||||||
|
|
||||||
|
transform:school-proximity:
|
||||||
|
desc: Compute good+ school proximity counts per postcode
|
||||||
|
deps:
|
||||||
|
- download:ofsted
|
||||||
|
- download:arcgis
|
||||||
|
status:
|
||||||
|
- test -f {{.SCHOOL_PROXIMITY_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.school_proximity --ofsted {{.OFSTED_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --output {{.SCHOOL_PROXIMITY_OUTPUT}}
|
||||||
|
|
||||||
|
download:journey-times:
|
||||||
|
desc: Fetch TfL journey times for all postcodes
|
||||||
|
deps:
|
||||||
|
- download:arcgis
|
||||||
|
status:
|
||||||
|
- test -f {{.JOURNEY_TIMES}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.journey_times
|
||||||
|
|
||||||
|
prepare:
|
||||||
|
desc: Build wide property dataframe with all joins
|
||||||
|
deps:
|
||||||
|
- transform:epc-pp
|
||||||
|
- download:arcgis
|
||||||
|
- download:deprivation
|
||||||
|
- download:ethnicity
|
||||||
|
- download:broadband
|
||||||
|
- download:noise
|
||||||
|
- transform:crime
|
||||||
|
- transform:poi-proximity
|
||||||
|
- transform:school-proximity
|
||||||
|
- prompt:journey-times
|
||||||
|
status:
|
||||||
|
- test -f {{.WIDE_OUTPUT}}
|
||||||
|
cmds:
|
||||||
|
- uv run python -m pipeline.transform.merge --epc-pp {{.EPC_PP_OUTPUT}} --arcgis {{.ARCGIS_OUTPUT}} --iod {{.IOD_OUTPUT}} --poi-proximity {{.POI_PROXIMITY_OUTPUT}} --journey-times {{.JOURNEY_TIMES}} --ethnicity {{.ETHNICITY_OUTPUT}} --crime {{.CRIME_OUTPUT}} --noise {{.NOISE_OUTPUT}} --school-proximity {{.SCHOOL_PROXIMITY_OUTPUT}} --broadband {{.BROADBAND_OUTPUT}} --output {{.WIDE_OUTPUT}}
|
||||||
116
Taskfile.yml
116
Taskfile.yml
|
|
@ -1,66 +1,79 @@
|
||||||
version: '3'
|
version: '3'
|
||||||
|
|
||||||
|
includes:
|
||||||
|
data:
|
||||||
|
taskfile: ./Taskfile.data.yml
|
||||||
|
flatten: true
|
||||||
|
|
||||||
|
vars:
|
||||||
|
DATA_DIR: /bulk/property-data
|
||||||
|
WIDE_OUTPUT: "{{.DATA_DIR}}/wide.parquet"
|
||||||
|
POIS_FILTERED_OUTPUT: "{{.DATA_DIR}}/filtered_uk_pois.parquet"
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
install:
|
install:
|
||||||
desc: Install dependencies, generate client, and download data
|
desc: Install dependencies
|
||||||
cmds:
|
cmds:
|
||||||
- uv run generate_tfl_client.py
|
|
||||||
- uv sync
|
- uv sync
|
||||||
- cd frontend && npm install
|
- cd frontend && npm install
|
||||||
|
|
||||||
download:
|
test:
|
||||||
desc: Download data
|
cmds:
|
||||||
|
- uv run -m pipeline.utils.test_fuzzy_join
|
||||||
|
- uv run pytest pipeline/utils/test_haversine.py
|
||||||
|
- uv run pytest pipeline/utils/test_poi_counts.py
|
||||||
|
|
||||||
|
test:server:
|
||||||
|
desc: Run Rust backend tests
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo test
|
||||||
|
|
||||||
|
dev:server:
|
||||||
|
desc: Run Rust backend on port 8001 (debug build, fast compile)
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo run -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||||
|
|
||||||
|
dev:server:release:
|
||||||
|
desc: Run Rust backend on port 8001 (release build)
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo run --release -- --data {{.WIDE_OUTPUT}} --pois {{.POIS_FILTERED_OUTPUT}}
|
||||||
|
|
||||||
|
dev:frontend:
|
||||||
|
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
|
||||||
|
dir: frontend
|
||||||
deps:
|
deps:
|
||||||
- install
|
- install
|
||||||
cmds:
|
|
||||||
- uv run python download_land_registry.py
|
|
||||||
- uv run python download_arcgis_data.py
|
|
||||||
- uv run python download_pois.py
|
|
||||||
|
|
||||||
pipeline:
|
|
||||||
desc: Run data processing pipeline
|
|
||||||
deps:
|
|
||||||
- download
|
|
||||||
cmds:
|
|
||||||
- uv run python -m pipeline.run
|
|
||||||
|
|
||||||
prepare:
|
|
||||||
desc: Prepare the application (install, download data, run pipeline)
|
|
||||||
deps:
|
|
||||||
- pipeline
|
|
||||||
|
|
||||||
server:
|
|
||||||
desc: Run FastAPI backend on port 8001
|
|
||||||
cmds:
|
|
||||||
- uv run fastapi dev server/main.py --port 8001
|
|
||||||
|
|
||||||
frontend:
|
|
||||||
desc: Run frontend dev server on port 3030 (proxies /api to :8001)
|
|
||||||
dir: frontend
|
|
||||||
cmds:
|
cmds:
|
||||||
- npm run dev
|
- npm run dev
|
||||||
|
|
||||||
build:
|
build:server:
|
||||||
|
desc: Build server for production
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo build --release
|
||||||
|
|
||||||
|
build:frontend:
|
||||||
desc: Build frontend for production
|
desc: Build frontend for production
|
||||||
dir: frontend
|
dir: frontend
|
||||||
cmds:
|
cmds:
|
||||||
|
- npm run typecheck
|
||||||
- npm run build
|
- npm run build
|
||||||
|
|
||||||
prod:
|
|
||||||
desc: Run production server (serves built frontend)
|
|
||||||
cmds:
|
|
||||||
- uv run fastapi run server/main.py --port 8001
|
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
desc: Lint all code (Python and TypeScript)
|
desc: Lint all code (Python, TypeScript, and Rust)
|
||||||
cmds:
|
cmds:
|
||||||
- task: lint:python
|
- task: lint:python
|
||||||
- task: lint:frontend
|
- task: lint:frontend
|
||||||
|
- task: lint:rust
|
||||||
|
|
||||||
lint:python:
|
lint:python:
|
||||||
desc: Lint Python code with ruff
|
desc: Lint Python code with ruff and check for unused dependencies
|
||||||
cmds:
|
cmds:
|
||||||
- uv run ruff check .
|
- uv run ruff check .
|
||||||
|
- uv run deptry .
|
||||||
|
|
||||||
lint:frontend:
|
lint:frontend:
|
||||||
desc: Lint frontend TypeScript code
|
desc: Lint frontend TypeScript code
|
||||||
|
|
@ -69,11 +82,20 @@ tasks:
|
||||||
- npm run lint
|
- npm run lint
|
||||||
- npm run format:check
|
- npm run format:check
|
||||||
|
|
||||||
|
lint:rust:
|
||||||
|
desc: Lint Rust code with clippy, check formatting, and detect unused dependencies
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo clippy -- -D warnings
|
||||||
|
- cargo fmt --check
|
||||||
|
- cargo machete
|
||||||
|
|
||||||
format:
|
format:
|
||||||
desc: Format all code (Python and TypeScript)
|
desc: Format all code (Python, TypeScript, and Rust)
|
||||||
cmds:
|
cmds:
|
||||||
- task: format:python
|
- task: format:python
|
||||||
- task: format:frontend
|
- task: format:frontend
|
||||||
|
- task: format:rust
|
||||||
|
|
||||||
format:python:
|
format:python:
|
||||||
desc: Format Python code with ruff
|
desc: Format Python code with ruff
|
||||||
|
|
@ -88,15 +110,17 @@ tasks:
|
||||||
- npm run lint:fix
|
- npm run lint:fix
|
||||||
- npm run format
|
- npm run format
|
||||||
|
|
||||||
|
format:rust:
|
||||||
|
desc: Format Rust code with cargo fmt
|
||||||
|
dir: server-rs
|
||||||
|
cmds:
|
||||||
|
- cargo fmt --all
|
||||||
|
|
||||||
check:
|
check:
|
||||||
desc: Run all checks (lint, typecheck, build)
|
desc: Run all checks (lint, typecheck, build)
|
||||||
cmds:
|
cmds:
|
||||||
- task: lint
|
- task: lint
|
||||||
- task: typecheck
|
- task: build:server
|
||||||
- task: build
|
- task: build:frontend
|
||||||
|
- task: test
|
||||||
typecheck:
|
- task: test:server
|
||||||
desc: Type check frontend TypeScript code
|
|
||||||
dir: frontend
|
|
||||||
cmds:
|
|
||||||
- npm run typecheck
|
|
||||||
|
|
|
||||||
723
analyses/epc_analysis.ipynb
Normal file
723
analyses/epc_analysis.ipynb
Normal file
File diff suppressed because one or more lines are too long
620
analyses/journey_times_analysis.ipynb
Normal file
620
analyses/journey_times_analysis.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load diff
113
analyses/wide.ipynb
Normal file
113
analyses/wide.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,129 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""Download ArcGIS data and convert to Parquet."""
|
|
||||||
|
|
||||||
# Run it with:
|
|
||||||
# uv run download_arcgis_data.py
|
|
||||||
|
|
||||||
import time
|
|
||||||
import zipfile
|
|
||||||
import httpx
|
|
||||||
import polars as pl
|
|
||||||
from pathlib import Path
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
URL = "https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data"
|
|
||||||
|
|
||||||
BASE_DATA_PATH = Path("./data_sources")
|
|
||||||
BASE_DATA_PATH.mkdir(exist_ok=True)
|
|
||||||
DOWNLOAD_PATH = BASE_DATA_PATH / "arcgis_data.zip"
|
|
||||||
EXTRACT_PATH = BASE_DATA_PATH / "arcgis_extracted"
|
|
||||||
PARQUET_PATH = BASE_DATA_PATH / "arcgis_data.parquet"
|
|
||||||
|
|
||||||
MAX_RETRIES = 3
|
|
||||||
|
|
||||||
|
|
||||||
def download_with_progress(url: str, output_path: Path) -> None:
|
|
||||||
"""Download a file with progress bar and retry logic."""
|
|
||||||
for attempt in range(1, MAX_RETRIES + 1):
|
|
||||||
try:
|
|
||||||
with httpx.stream(
|
|
||||||
"GET",
|
|
||||||
url,
|
|
||||||
follow_redirects=True,
|
|
||||||
timeout=httpx.Timeout(30.0, read=None),
|
|
||||||
) as response:
|
|
||||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
|
||||||
total = int(response.headers.get("content-length", 0))
|
|
||||||
|
|
||||||
with (
|
|
||||||
open(output_path, "wb") as f,
|
|
||||||
tqdm(
|
|
||||||
total=total,
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
desc="Downloading",
|
|
||||||
) as pbar,
|
|
||||||
):
|
|
||||||
for chunk in response.iter_bytes(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
return # Success
|
|
||||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
|
||||||
if attempt < MAX_RETRIES:
|
|
||||||
wait = 2**attempt
|
|
||||||
print(f"Attempt {attempt} failed: {e}. Retrying in {wait}s...")
|
|
||||||
time.sleep(wait)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def extract_zip(zip_path: Path, extract_path: Path) -> list[Path]:
|
|
||||||
"""Extract ZIP file and return list of extracted files."""
|
|
||||||
print("Extracting ZIP file...")
|
|
||||||
extract_path.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
|
||||||
zf.extractall(extract_path)
|
|
||||||
return [extract_path / name for name in zf.namelist()]
|
|
||||||
|
|
||||||
|
|
||||||
def find_data_file(extract_path: Path) -> Path:
|
|
||||||
"""Find the main data file (CSV, XLSX, or similar) in extracted files."""
|
|
||||||
# Look for common data file extensions
|
|
||||||
for ext in ["*.csv", "*.xlsx", "*.xls", "*.json", "*.geojson"]:
|
|
||||||
files = list(extract_path.rglob(ext))
|
|
||||||
if files:
|
|
||||||
# Return the largest file if multiple found
|
|
||||||
return max(files, key=lambda f: f.stat().st_size)
|
|
||||||
|
|
||||||
raise FileNotFoundError(f"No data file found in {extract_path}")
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
|
|
||||||
"""Convert data file to Parquet using Polars."""
|
|
||||||
print(f"Converting {data_path.name} to Parquet...")
|
|
||||||
|
|
||||||
suffix = data_path.suffix.lower()
|
|
||||||
|
|
||||||
if suffix == ".csv":
|
|
||||||
df = pl.read_csv(data_path, try_parse_dates=True)
|
|
||||||
elif suffix in [".xlsx", ".xls"]:
|
|
||||||
df = pl.read_excel(data_path)
|
|
||||||
elif suffix in [".json", ".geojson"]:
|
|
||||||
df = pl.read_json(data_path)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported file format: {suffix}")
|
|
||||||
|
|
||||||
df.write_parquet(parquet_path, compression="zstd")
|
|
||||||
print(f"Saved to {parquet_path}")
|
|
||||||
print(f"Rows: {df.height:,}")
|
|
||||||
print(f"Columns: {df.columns}")
|
|
||||||
print(f"Original size: {data_path.stat().st_size / 1024**2:.1f} MB")
|
|
||||||
print(f"Parquet size: {parquet_path.stat().st_size / 1024**2:.1f} MB")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
if PARQUET_PATH.exists():
|
|
||||||
print(f"Parquet already exists at {PARQUET_PATH}, skipping")
|
|
||||||
return
|
|
||||||
|
|
||||||
if not DOWNLOAD_PATH.exists():
|
|
||||||
download_with_progress(URL, DOWNLOAD_PATH)
|
|
||||||
else:
|
|
||||||
print(f"File already exists at {DOWNLOAD_PATH}, skipping download")
|
|
||||||
|
|
||||||
# Check if it's a ZIP file
|
|
||||||
if zipfile.is_zipfile(DOWNLOAD_PATH):
|
|
||||||
extracted_files = extract_zip(DOWNLOAD_PATH, EXTRACT_PATH)
|
|
||||||
print(f"Extracted {len(extracted_files)} files")
|
|
||||||
data_file = find_data_file(EXTRACT_PATH)
|
|
||||||
else:
|
|
||||||
# Not a ZIP, treat as direct data file
|
|
||||||
data_file = DOWNLOAD_PATH
|
|
||||||
|
|
||||||
convert_to_parquet(data_file, PARQUET_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""Download IoD2025 Deprivation Scores and convert to Parquet."""
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
import polars as pl
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
URL = "https://assets.publishing.service.gov.uk/media/691ded34513046b952c500bd/File_5_IoD2025_Scores_for_the_Indices_of_Deprivation.xlsx"
|
|
||||||
|
|
||||||
BASE_DATA_PATH = Path("./data_sources")
|
|
||||||
BASE_DATA_PATH.mkdir(exist_ok=True)
|
|
||||||
XLSX_PATH = BASE_DATA_PATH / "IoD2025_Scores.xlsx"
|
|
||||||
PARQUET_PATH = BASE_DATA_PATH / "IoD2025_Scores.parquet"
|
|
||||||
|
|
||||||
|
|
||||||
def download_file(url: str, output_path: Path) -> None:
|
|
||||||
"""Download file from URL."""
|
|
||||||
print(f"Downloading from {url}...")
|
|
||||||
with httpx.stream("GET", url, follow_redirects=True, timeout=60) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
total = int(response.headers.get("content-length", 0))
|
|
||||||
downloaded = 0
|
|
||||||
with open(output_path, "wb") as f:
|
|
||||||
for chunk in response.iter_bytes(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
downloaded += len(chunk)
|
|
||||||
if total:
|
|
||||||
print(f"\rDownloaded {downloaded / 1024 / 1024:.1f} MB / {total / 1024 / 1024:.1f} MB", end="")
|
|
||||||
print(f"\nSaved to {output_path}")
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
|
|
||||||
"""Convert Excel sheet 2 to Parquet."""
|
|
||||||
print("Reading Excel file (sheet 2)...")
|
|
||||||
|
|
||||||
# Read the 2nd sheet (index 1) - IoD2025 Scores
|
|
||||||
df = pl.read_excel(
|
|
||||||
xlsx_path,
|
|
||||||
sheet_id=2, # 1-indexed, so 2 = second sheet
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Shape: {df.shape}")
|
|
||||||
print(f"Columns: {df.columns}")
|
|
||||||
|
|
||||||
df.write_parquet(parquet_path, compression="zstd")
|
|
||||||
print(f"Saved to {parquet_path}")
|
|
||||||
print(f"Excel size: {xlsx_path.stat().st_size / 1024 / 1024:.1f} MB")
|
|
||||||
print(f"Parquet size: {parquet_path.stat().st_size / 1024 / 1024:.1f} MB")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
if not XLSX_PATH.exists():
|
|
||||||
download_file(URL, XLSX_PATH)
|
|
||||||
else:
|
|
||||||
print(f"Excel file already exists at {XLSX_PATH}, skipping download")
|
|
||||||
|
|
||||||
convert_to_parquet(XLSX_PATH, PARQUET_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""Download Land Registry price paid data and convert to Parquet."""
|
|
||||||
|
|
||||||
# Run it with:
|
|
||||||
# uv run download_land_registry.py
|
|
||||||
|
|
||||||
# The download failed in this environment due to network restrictions, but the script will work on your local machine. The ~5GB CSV should compress to roughly ~1GB in Parquet format with ZSTD compression.
|
|
||||||
|
|
||||||
import time
|
|
||||||
import httpx
|
|
||||||
import polars as pl
|
|
||||||
from pathlib import Path
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
URL = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv"
|
|
||||||
|
|
||||||
BASE_DATA_PATH = Path("./data_sources")
|
|
||||||
BASE_DATA_PATH.mkdir(exist_ok=True)
|
|
||||||
CSV_PATH = BASE_DATA_PATH / "pp-complete.csv"
|
|
||||||
PARQUET_PATH = BASE_DATA_PATH / "pp-complete.parquet"
|
|
||||||
|
|
||||||
MAX_RETRIES = 3
|
|
||||||
|
|
||||||
|
|
||||||
def download_with_progress(url: str, output_path: Path) -> None:
|
|
||||||
"""Download a file with progress bar and retry logic."""
|
|
||||||
for attempt in range(1, MAX_RETRIES + 1):
|
|
||||||
try:
|
|
||||||
with httpx.stream(
|
|
||||||
"GET",
|
|
||||||
url,
|
|
||||||
follow_redirects=True,
|
|
||||||
timeout=httpx.Timeout(30.0, read=None),
|
|
||||||
) as response:
|
|
||||||
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
|
||||||
total = int(response.headers.get("content-length", 0))
|
|
||||||
|
|
||||||
with (
|
|
||||||
open(output_path, "wb") as f,
|
|
||||||
tqdm(
|
|
||||||
total=total,
|
|
||||||
unit="B",
|
|
||||||
unit_scale=True,
|
|
||||||
unit_divisor=1024,
|
|
||||||
desc="Downloading",
|
|
||||||
) as pbar,
|
|
||||||
):
|
|
||||||
for chunk in response.iter_bytes(chunk_size=8192):
|
|
||||||
f.write(chunk)
|
|
||||||
pbar.update(len(chunk))
|
|
||||||
return # Success
|
|
||||||
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
|
||||||
if attempt < MAX_RETRIES:
|
|
||||||
wait = 2**attempt
|
|
||||||
print(f"Attempt {attempt} failed: {e}. Retrying in {wait}s...")
|
|
||||||
time.sleep(wait)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
|
|
||||||
"""Convert CSV to Parquet using Polars."""
|
|
||||||
print("Converting to Parquet...")
|
|
||||||
|
|
||||||
# https://www.gov.uk/guidance/about-the-price-paid-data
|
|
||||||
# Land Registry CSV columns
|
|
||||||
columns = [
|
|
||||||
"transaction_id",
|
|
||||||
"price",
|
|
||||||
"date_of_transfer",
|
|
||||||
"postcode",
|
|
||||||
"property_type",
|
|
||||||
"old_new",
|
|
||||||
"duration",
|
|
||||||
"paon",
|
|
||||||
"saon",
|
|
||||||
"street",
|
|
||||||
"locality",
|
|
||||||
"town_city",
|
|
||||||
"district",
|
|
||||||
"county",
|
|
||||||
"ppd_category",
|
|
||||||
"record_status",
|
|
||||||
]
|
|
||||||
|
|
||||||
df = pl.read_csv(
|
|
||||||
csv_path,
|
|
||||||
has_header=False,
|
|
||||||
new_columns=columns,
|
|
||||||
try_parse_dates=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
df.write_parquet(parquet_path, compression="zstd")
|
|
||||||
print(f"Saved to {parquet_path}")
|
|
||||||
print(f"Rows: {df.height:,}")
|
|
||||||
print(f"CSV size: {csv_path.stat().st_size / 1024**2:.1f} MB")
|
|
||||||
print(f"Parquet size: {parquet_path.stat().st_size / 1024**2:.1f} MB")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
if PARQUET_PATH.exists():
|
|
||||||
print(f"Parquet already exists at {PARQUET_PATH}, skipping")
|
|
||||||
return
|
|
||||||
|
|
||||||
if not CSV_PATH.exists():
|
|
||||||
download_with_progress(URL, CSV_PATH)
|
|
||||||
else:
|
|
||||||
print(f"CSV already exists at {CSV_PATH}, skipping download")
|
|
||||||
|
|
||||||
convert_to_parquet(CSV_PATH, PARQUET_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
||||||
"""Download POI data for the UK from Overture Maps."""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import overturemaps
|
|
||||||
import pyarrow as pa
|
|
||||||
import pyarrow.parquet as pq
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
# UK bounding box (west, south, east, north)
|
|
||||||
UK_BBOX = (-8.65, 49.86, 1.77, 60.86)
|
|
||||||
|
|
||||||
OUTPUT_DIR = Path("data_sources")
|
|
||||||
OUTPUT_FILE = OUTPUT_DIR / "uk_pois.parquet"
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if OUTPUT_FILE.exists():
|
|
||||||
print(f"POI file already exists: {OUTPUT_FILE}")
|
|
||||||
print("Delete it manually to re-download.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("Downloading UK POI data from Overture Maps...")
|
|
||||||
print(f"Bounding box: {UK_BBOX}")
|
|
||||||
print("This may take several minutes...")
|
|
||||||
|
|
||||||
reader = overturemaps.record_batch_reader("place", bbox=UK_BBOX)
|
|
||||||
|
|
||||||
# Read all batches
|
|
||||||
batches = []
|
|
||||||
with tqdm(desc="Downloading batches", unit=" batches") as pbar:
|
|
||||||
for batch in reader:
|
|
||||||
batches.append(batch)
|
|
||||||
pbar.update(1)
|
|
||||||
pbar.set_postfix(rows=sum(b.num_rows for b in batches))
|
|
||||||
|
|
||||||
if not batches:
|
|
||||||
print("No data found in bounding box!")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Combine batches into a table and write
|
|
||||||
table = pa.Table.from_batches(batches, schema=reader.schema)
|
|
||||||
|
|
||||||
print(f"\nWriting {table.num_rows:,} POIs to {OUTPUT_FILE}...")
|
|
||||||
pq.write_table(table, OUTPUT_FILE)
|
|
||||||
|
|
||||||
print(f"Download complete: {OUTPUT_FILE}")
|
|
||||||
print(f"File size: {OUTPUT_FILE.stat().st_size / 1024 / 1024:.1f} MB")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
17
frontend/package-lock.json
generated
17
frontend/package-lock.json
generated
|
|
@ -11,6 +11,7 @@
|
||||||
"@deck.gl/core": "^9.0.0",
|
"@deck.gl/core": "^9.0.0",
|
||||||
"@deck.gl/geo-layers": "^9.0.0",
|
"@deck.gl/geo-layers": "^9.0.0",
|
||||||
"@deck.gl/layers": "^9.0.0",
|
"@deck.gl/layers": "^9.0.0",
|
||||||
|
"@deck.gl/mapbox": "^9.2.6",
|
||||||
"@deck.gl/react": "^9.0.0",
|
"@deck.gl/react": "^9.0.0",
|
||||||
"@radix-ui/react-select": "^2.0.0",
|
"@radix-ui/react-select": "^2.0.0",
|
||||||
"@radix-ui/react-slider": "^1.1.0",
|
"@radix-ui/react-slider": "^1.1.0",
|
||||||
|
|
@ -181,6 +182,22 @@
|
||||||
"@luma.gl/engine": "~9.2.6"
|
"@luma.gl/engine": "~9.2.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@deck.gl/mapbox": {
|
||||||
|
"version": "9.2.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/@deck.gl/mapbox/-/mapbox-9.2.6.tgz",
|
||||||
|
"integrity": "sha512-gyqCHZwiZS8LOYY6LILQQp5YCCf++VFk/wRoGskZvhb/kdEPX2Onv8iV8pXe0h9UyMLO6Mj0wl3HlJWg2ILkrg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@luma.gl/constants": "^9.2.6",
|
||||||
|
"@math.gl/web-mercator": "^4.1.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@deck.gl/core": "~9.2.0",
|
||||||
|
"@luma.gl/constants": "~9.2.6",
|
||||||
|
"@luma.gl/core": "~9.2.6",
|
||||||
|
"@math.gl/web-mercator": "^4.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@deck.gl/mesh-layers": {
|
"node_modules/@deck.gl/mesh-layers": {
|
||||||
"version": "9.2.6",
|
"version": "9.2.6",
|
||||||
"resolved": "https://registry.npmjs.org/@deck.gl/mesh-layers/-/mesh-layers-9.2.6.tgz",
|
"resolved": "https://registry.npmjs.org/@deck.gl/mesh-layers/-/mesh-layers-9.2.6.tgz",
|
||||||
|
|
|
||||||
|
|
@ -11,41 +11,42 @@
|
||||||
"format:check": "prettier --check \"src/**/*.{ts,tsx,css}\""
|
"format:check": "prettier --check \"src/**/*.{ts,tsx,css}\""
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"react": "^18.2.0",
|
|
||||||
"react-dom": "^18.2.0",
|
|
||||||
"@deck.gl/core": "^9.0.0",
|
"@deck.gl/core": "^9.0.0",
|
||||||
"@deck.gl/layers": "^9.0.0",
|
|
||||||
"@deck.gl/geo-layers": "^9.0.0",
|
"@deck.gl/geo-layers": "^9.0.0",
|
||||||
|
"@deck.gl/layers": "^9.0.0",
|
||||||
|
"@deck.gl/mapbox": "^9.2.6",
|
||||||
"@deck.gl/react": "^9.0.0",
|
"@deck.gl/react": "^9.0.0",
|
||||||
"maplibre-gl": "^4.0.0",
|
|
||||||
"react-map-gl": "^7.1.0",
|
|
||||||
"@radix-ui/react-slider": "^1.1.0",
|
|
||||||
"@radix-ui/react-select": "^2.0.0",
|
"@radix-ui/react-select": "^2.0.0",
|
||||||
|
"@radix-ui/react-slider": "^1.1.0",
|
||||||
"class-variance-authority": "^0.7.0",
|
"class-variance-authority": "^0.7.0",
|
||||||
"clsx": "^2.1.0",
|
"clsx": "^2.1.0",
|
||||||
|
"maplibre-gl": "^4.0.0",
|
||||||
|
"react": "^18.2.0",
|
||||||
|
"react-dom": "^18.2.0",
|
||||||
|
"react-map-gl": "^7.1.0",
|
||||||
"tailwind-merge": "^2.2.0",
|
"tailwind-merge": "^2.2.0",
|
||||||
"tailwindcss-animate": "^1.0.7"
|
"tailwindcss-animate": "^1.0.7"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"webpack": "^5.90.0",
|
|
||||||
"webpack-cli": "^5.1.0",
|
|
||||||
"webpack-dev-server": "^5.0.0",
|
|
||||||
"html-webpack-plugin": "^5.6.0",
|
|
||||||
"css-loader": "^7.0.0",
|
|
||||||
"style-loader": "^4.0.0",
|
|
||||||
"postcss-loader": "^8.0.0",
|
|
||||||
"ts-loader": "^9.5.0",
|
|
||||||
"typescript": "^5.4.0",
|
|
||||||
"@types/react": "^18.2.0",
|
"@types/react": "^18.2.0",
|
||||||
"@types/react-dom": "^18.2.0",
|
"@types/react-dom": "^18.2.0",
|
||||||
"tailwindcss": "^3.4.0",
|
|
||||||
"autoprefixer": "^10.4.0",
|
|
||||||
"postcss": "^8.4.0",
|
|
||||||
"eslint": "^8.57.0",
|
|
||||||
"@typescript-eslint/eslint-plugin": "^7.0.0",
|
"@typescript-eslint/eslint-plugin": "^7.0.0",
|
||||||
"@typescript-eslint/parser": "^7.0.0",
|
"@typescript-eslint/parser": "^7.0.0",
|
||||||
|
"autoprefixer": "^10.4.0",
|
||||||
|
"css-loader": "^7.0.0",
|
||||||
|
"eslint": "^8.57.0",
|
||||||
"eslint-plugin-react": "^7.34.0",
|
"eslint-plugin-react": "^7.34.0",
|
||||||
"eslint-plugin-react-hooks": "^4.6.0",
|
"eslint-plugin-react-hooks": "^4.6.0",
|
||||||
"prettier": "^3.2.0"
|
"html-webpack-plugin": "^5.6.0",
|
||||||
|
"postcss": "^8.4.0",
|
||||||
|
"postcss-loader": "^8.0.0",
|
||||||
|
"prettier": "^3.2.0",
|
||||||
|
"style-loader": "^4.0.0",
|
||||||
|
"tailwindcss": "^3.4.0",
|
||||||
|
"ts-loader": "^9.5.0",
|
||||||
|
"typescript": "^5.4.0",
|
||||||
|
"webpack": "^5.90.0",
|
||||||
|
"webpack-cli": "^5.1.0",
|
||||||
|
"webpack-dev-server": "^5.0.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
1027
frontend/src/App.tsx
1027
frontend/src/App.tsx
File diff suppressed because it is too large
Load diff
243
frontend/src/components/AreaPane.tsx
Normal file
243
frontend/src/components/AreaPane.tsx
Normal file
|
|
@ -0,0 +1,243 @@
|
||||||
|
import { useMemo } from 'react';
|
||||||
|
import type { FeatureMeta, HexagonStatsResponse } from '../types';
|
||||||
|
|
||||||
|
interface AreaPaneProps {
|
||||||
|
stats: HexagonStatsResponse | null;
|
||||||
|
globalFeatures: FeatureMeta[];
|
||||||
|
loading: boolean;
|
||||||
|
hexagonId: string | null;
|
||||||
|
isHoveredPreview: boolean;
|
||||||
|
hoverMode: boolean;
|
||||||
|
onHoverModeChange: (enabled: boolean) => void;
|
||||||
|
onViewProperties: () => void;
|
||||||
|
onClose: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatValue(value: number): string {
|
||||||
|
if (Math.abs(value) >= 1_000_000) return `${(value / 1_000_000).toFixed(1)}M`;
|
||||||
|
if (Math.abs(value) >= 1_000) return `${(value / 1_000).toFixed(0)}k`;
|
||||||
|
if (Number.isInteger(value)) return value.toLocaleString();
|
||||||
|
return value.toFixed(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group features by their group field from globalFeatures
|
||||||
|
function groupFeatures(
|
||||||
|
globalFeatures: FeatureMeta[]
|
||||||
|
): { name: string; features: FeatureMeta[] }[] {
|
||||||
|
const groups: { name: string; features: FeatureMeta[] }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
for (const feature of globalFeatures) {
|
||||||
|
const groupName = feature.group || 'Other';
|
||||||
|
if (!seen.has(groupName)) {
|
||||||
|
seen.add(groupName);
|
||||||
|
groups.push({ name: groupName, features: [] });
|
||||||
|
}
|
||||||
|
groups.find((group) => group.name === groupName)!.features.push(feature);
|
||||||
|
}
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
function MiniHistogram({ counts, maxCount }: { counts: number[]; maxCount: number }) {
|
||||||
|
if (maxCount === 0) return null;
|
||||||
|
// Downsample to ~20 bars for display
|
||||||
|
const targetBars = 20;
|
||||||
|
const step = Math.max(1, Math.floor(counts.length / targetBars));
|
||||||
|
const bars: number[] = [];
|
||||||
|
for (let index = 0; index < counts.length; index += step) {
|
||||||
|
let sum = 0;
|
||||||
|
for (let offset = 0; offset < step && index + offset < counts.length; offset++) {
|
||||||
|
sum += counts[index + offset];
|
||||||
|
}
|
||||||
|
bars.push(sum);
|
||||||
|
}
|
||||||
|
const barMax = Math.max(...bars, 1);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex items-end gap-px h-8 mt-1">
|
||||||
|
{bars.map((count, index) => (
|
||||||
|
<div
|
||||||
|
key={index}
|
||||||
|
className="flex-1 bg-teal-500 dark:bg-teal-400 rounded-t-sm min-w-[2px]"
|
||||||
|
style={{ height: `${(count / barMax) * 100}%`, opacity: count > 0 ? 1 : 0.1 }}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function EnumBarChart({ counts }: { counts: Record<string, number> }) {
|
||||||
|
const entries = Object.entries(counts).sort(([, countA], [, countB]) => countB - countA);
|
||||||
|
const maxCount = Math.max(...entries.map(([, count]) => count), 1);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-1 mt-1">
|
||||||
|
{entries.map(([label, count]) => (
|
||||||
|
<div key={label} className="flex items-center gap-2 text-xs">
|
||||||
|
<span className="w-16 truncate text-warm-500 dark:text-warm-400 text-right shrink-0">
|
||||||
|
{label}
|
||||||
|
</span>
|
||||||
|
<div className="flex-1 h-3 bg-warm-100 dark:bg-navy-700 rounded overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-full bg-teal-500 dark:bg-teal-400 rounded"
|
||||||
|
style={{ width: `${(count / maxCount) * 100}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<span className="w-8 text-warm-500 dark:text-warm-400 text-right shrink-0">{count}</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function AreaPane({
|
||||||
|
stats,
|
||||||
|
globalFeatures,
|
||||||
|
loading,
|
||||||
|
hexagonId,
|
||||||
|
isHoveredPreview,
|
||||||
|
hoverMode,
|
||||||
|
onHoverModeChange,
|
||||||
|
onViewProperties,
|
||||||
|
onClose,
|
||||||
|
}: AreaPaneProps) {
|
||||||
|
const featureGroups = useMemo(() => groupFeatures(globalFeatures), [globalFeatures]);
|
||||||
|
|
||||||
|
// Build lookup maps from stats
|
||||||
|
const numericByName = useMemo(() => {
|
||||||
|
if (!stats) return new Map();
|
||||||
|
return new Map(stats.numeric_features.map((feature) => [feature.name, feature]));
|
||||||
|
}, [stats]);
|
||||||
|
|
||||||
|
const enumByName = useMemo(() => {
|
||||||
|
if (!stats) return new Map();
|
||||||
|
return new Map(stats.enum_features.map((feature) => [feature.name, feature]));
|
||||||
|
}, [stats]);
|
||||||
|
|
||||||
|
if (!hexagonId) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-center h-full text-warm-500 dark:text-warm-400 px-4 text-center text-sm">
|
||||||
|
Click a hexagon to view area statistics
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col h-full">
|
||||||
|
{/* Header */}
|
||||||
|
<div className="p-3 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<div className="flex justify-between items-center">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<h2 className="text-sm font-semibold dark:text-warm-100">Area Statistics</h2>
|
||||||
|
{isHoveredPreview && (
|
||||||
|
<span className="text-xs px-1.5 py-0.5 rounded bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
|
||||||
|
Preview
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-1">
|
||||||
|
<button
|
||||||
|
onClick={() => onHoverModeChange(!hoverMode)}
|
||||||
|
className={`p-1 rounded ${
|
||||||
|
hoverMode
|
||||||
|
? 'text-teal-600 dark:text-teal-400 bg-teal-50 dark:bg-teal-900/30'
|
||||||
|
: 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'
|
||||||
|
}`}
|
||||||
|
title={hoverMode ? 'Live preview on (click to lock)' : 'Live preview off (click to enable)'}
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z" />
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={onClose}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-1"
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{stats && (
|
||||||
|
<p className="text-sm text-warm-600 dark:text-warm-400 mt-1">
|
||||||
|
{stats.count.toLocaleString()} properties
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
{stats && (
|
||||||
|
<button
|
||||||
|
onClick={onViewProperties}
|
||||||
|
className="mt-2 w-full text-sm py-1.5 rounded bg-teal-600 hover:bg-teal-700 text-white font-medium"
|
||||||
|
>
|
||||||
|
View {stats.count.toLocaleString()} Properties
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Stats content */}
|
||||||
|
<div className="flex-1 overflow-y-auto">
|
||||||
|
{loading && !stats ? (
|
||||||
|
<div className="p-4 text-warm-500 dark:text-warm-400 text-sm">Loading...</div>
|
||||||
|
) : stats ? (
|
||||||
|
<div className="p-3 space-y-4">
|
||||||
|
{featureGroups.map((group) => {
|
||||||
|
// Check if any feature in this group has data
|
||||||
|
const hasData = group.features.some(
|
||||||
|
(feature) => numericByName.has(feature.name) || enumByName.has(feature.name)
|
||||||
|
);
|
||||||
|
if (!hasData) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={group.name}>
|
||||||
|
<h3 className="text-xs font-semibold text-warm-500 dark:text-warm-400 uppercase tracking-wider mb-2">
|
||||||
|
{group.name}
|
||||||
|
</h3>
|
||||||
|
<div className="space-y-3">
|
||||||
|
{group.features.map((feature) => {
|
||||||
|
const numericStats = numericByName.get(feature.name);
|
||||||
|
const enumStats = enumByName.get(feature.name);
|
||||||
|
|
||||||
|
if (numericStats) {
|
||||||
|
const maxCount = Math.max(...numericStats.histogram.counts);
|
||||||
|
return (
|
||||||
|
<div key={feature.name} className="bg-warm-50 dark:bg-navy-800 rounded p-2">
|
||||||
|
<div className="flex justify-between items-baseline">
|
||||||
|
<span className="text-xs text-warm-700 dark:text-warm-300 truncate mr-2">
|
||||||
|
{feature.name}
|
||||||
|
</span>
|
||||||
|
<span className="text-xs font-semibold text-teal-700 dark:text-teal-400 whitespace-nowrap">
|
||||||
|
{formatValue(numericStats.mean)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="flex justify-between text-[10px] text-warm-400 dark:text-warm-500 mt-0.5">
|
||||||
|
<span>{formatValue(numericStats.min)}</span>
|
||||||
|
<span>{formatValue(numericStats.max)}</span>
|
||||||
|
</div>
|
||||||
|
<MiniHistogram counts={numericStats.histogram.counts} maxCount={maxCount} />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enumStats) {
|
||||||
|
return (
|
||||||
|
<div key={feature.name} className="bg-warm-50 dark:bg-navy-800 rounded p-2">
|
||||||
|
<span className="text-xs text-warm-700 dark:text-warm-300">
|
||||||
|
{feature.name}
|
||||||
|
</span>
|
||||||
|
<EnumBarChart counts={enumStats.counts} />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
10
frontend/src/components/DataSources.tsx
Normal file
10
frontend/src/components/DataSources.tsx
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
export default function DataSources({ onNavigate }: { onNavigate: () => void }) {
|
||||||
|
return (
|
||||||
|
<button
|
||||||
|
onClick={onNavigate}
|
||||||
|
className="absolute bottom-2 right-2 bg-white/90 dark:bg-navy-800/90 backdrop-blur-sm px-3 py-2 rounded shadow-lg text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline font-semibold transition-colors"
|
||||||
|
>
|
||||||
|
Data Sources
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
}
|
||||||
214
frontend/src/components/DataSourcesPage.tsx
Normal file
214
frontend/src/components/DataSourcesPage.tsx
Normal file
|
|
@ -0,0 +1,214 @@
|
||||||
|
import { useEffect, useState, useRef } from 'react';
|
||||||
|
|
||||||
|
const DATA_SOURCES = [
|
||||||
|
{
|
||||||
|
id: 'price-paid',
|
||||||
|
name: 'Price Paid Data',
|
||||||
|
origin: 'HM Land Registry',
|
||||||
|
use: 'Complete historical property sale prices for England and Wales. Used for the last known sale price of each property.',
|
||||||
|
url: 'https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'epc',
|
||||||
|
name: 'Energy Performance Certificates (EPC)',
|
||||||
|
origin: 'Ministry of Housing, Communities & Local Government',
|
||||||
|
use: 'Domestic Energy Performance Certificates providing floor area, number of rooms, construction age, energy ratings, property type, and built form. Fuzzy-joined with Price Paid records by address within postcode buckets.',
|
||||||
|
url: 'https://epc.opendatacommunities.org/downloads/domestic',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'nspl',
|
||||||
|
name: 'National Statistics Postcode Lookup (NSPL)',
|
||||||
|
origin: 'ONS / ArcGIS',
|
||||||
|
use: 'Maps postcodes to latitude/longitude, LSOA, and Output Area codes for geolocation and joining area-level datasets.',
|
||||||
|
url: 'https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'iod',
|
||||||
|
name: 'English Indices of Deprivation 2025',
|
||||||
|
origin: 'Ministry of Housing, Communities & Local Government',
|
||||||
|
use: 'Relative deprivation scores for 33,755 LSOAs across domains: Income, Employment, Education, Health, Crime, Living Environment, and sub-domains. Joined to properties via LSOA code.',
|
||||||
|
url: 'https://www.gov.uk/government/statistics/english-indices-of-deprivation-2025',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'ethnicity',
|
||||||
|
name: 'Population by Ethnicity (2021 Census)',
|
||||||
|
origin: 'ONS',
|
||||||
|
use: 'Population percentages by ethnic group (Asian, Black, Mixed, White, Other) per Local Authority. Joined via Local Authority District code.',
|
||||||
|
url: 'https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/#download-the-data',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'crime',
|
||||||
|
name: 'Street-level Crime Data',
|
||||||
|
origin: 'data.police.uk',
|
||||||
|
use: 'Street-level crime data from 2023 to 2025, aggregated into yearly averages by LSOA and crime type (violence, burglary, anti-social behaviour, drugs, vehicle crime, etc.).',
|
||||||
|
url: 'https://data.police.uk/data/',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'tfl-journey-times',
|
||||||
|
name: 'TfL Journey Times',
|
||||||
|
origin: 'Transport for London',
|
||||||
|
use: "Journey time calculations from postcodes to central London destinations (Bank, Waterloo, King's Cross, etc.) via public transport and cycling.",
|
||||||
|
url: 'https://api-portal.tfl.gov.uk/',
|
||||||
|
license: 'Powered by TfL Open Data',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'osm-pois',
|
||||||
|
name: 'OpenStreetMap POIs',
|
||||||
|
origin: 'OpenStreetMap contributors / Geofabrik',
|
||||||
|
use: 'Points of interest extracted from the Great Britain PBF extract. Covers amenities, shops, healthcare, leisure, tourism, and more. Filtered and remapped to friendly category names.',
|
||||||
|
url: 'https://download.geofabrik.de/europe/great-britain-latest.osm.pbf',
|
||||||
|
license: 'Open Data Commons Open Database License (ODbL)',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'naptan',
|
||||||
|
name: 'NaPTAN (Public Transport Stops)',
|
||||||
|
origin: 'Department for Transport',
|
||||||
|
use: 'National Public Transport Access Nodes providing station and stop locations (rail, bus, metro/tram, ferry, airport), merged into the POI dataset.',
|
||||||
|
url: 'https://naptan.dft.gov.uk/naptan/schema/2.4/doc/NaPTANSchemaGuide-2.4-v0.57.pdf',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'noise',
|
||||||
|
name: 'Defra Noise Mapping',
|
||||||
|
origin: 'Defra / Environment Agency',
|
||||||
|
use: 'Strategic noise mapping Round 4 (2022) for road, rail, and airport sources. Lden (day-evening-night 24h weighted average) at 10m grid resolution, modelled at 4m above ground. Sampled at postcode centroids via WCS GeoTIFF tiles.',
|
||||||
|
url: 'https://environment.data.gov.uk/spatialdata/road-noise-all-metrics-england-round-4/wcs',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'ofsted',
|
||||||
|
name: 'Ofsted School Inspections',
|
||||||
|
origin: 'Ofsted',
|
||||||
|
use: 'Latest inspection outcomes for state-funded schools (as at April 2025). Averaged per postcode to give a local school quality score (1=Outstanding to 4=Inadequate).',
|
||||||
|
url: 'https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'broadband',
|
||||||
|
name: 'Ofcom Broadband Performance',
|
||||||
|
origin: 'Ofcom',
|
||||||
|
use: 'Fixed broadband coverage and speeds by Output Area from Connected Nations 2025. Includes max download/upload speeds across different speed tiers.',
|
||||||
|
url: 'https://www.ofcom.org.uk/phones-and-broadband/coverage-and-speeds/connected-nations-20252/data-downloads-2025',
|
||||||
|
license: 'Open Government Licence v3.0',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
export default function DataSourcesPage() {
|
||||||
|
const [highlightedId, setHighlightedId] = useState<string | null>(null);
|
||||||
|
const cardRefs = useRef<Record<string, HTMLDivElement | null>>({});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
function handleHash() {
|
||||||
|
const hash = window.location.hash.replace('#', '');
|
||||||
|
if (hash && DATA_SOURCES.some((s) => s.id === hash)) {
|
||||||
|
setHighlightedId(hash);
|
||||||
|
// Scroll after a brief delay to allow render
|
||||||
|
setTimeout(() => {
|
||||||
|
cardRefs.current[hash]?.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||||||
|
}, 100);
|
||||||
|
} else {
|
||||||
|
setHighlightedId(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handleHash();
|
||||||
|
window.addEventListener('hashchange', handleHash);
|
||||||
|
return () => window.removeEventListener('hashchange', handleHash);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950 flex flex-col">
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="max-w-5xl mx-auto px-6 py-8">
|
||||||
|
<h1 className="text-2xl font-bold text-warm-900 dark:text-warm-100 mb-2">Data Sources</h1>
|
||||||
|
<p className="text-warm-600 dark:text-warm-400 mb-6">
|
||||||
|
This application combines {DATA_SOURCES.length} open datasets covering property prices,
|
||||||
|
energy performance, transport, demographics, crime, environment, and more.
|
||||||
|
</p>
|
||||||
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-4">
|
||||||
|
{DATA_SOURCES.map((source) => (
|
||||||
|
<div
|
||||||
|
key={source.id}
|
||||||
|
id={source.id}
|
||||||
|
ref={(el) => { cardRefs.current[source.id] = el; }}
|
||||||
|
className={`bg-white dark:bg-navy-800 rounded-lg border p-5 ${
|
||||||
|
highlightedId === source.id
|
||||||
|
? 'border-teal-400 ring-2 ring-teal-400'
|
||||||
|
: 'border-warm-200 dark:border-navy-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between gap-4 mb-2">
|
||||||
|
<h2 className="text-lg font-semibold text-warm-900 dark:text-warm-100">{source.name}</h2>
|
||||||
|
<span className="shrink-0 text-xs bg-warm-100 dark:bg-navy-700 text-warm-600 dark:text-warm-300 px-2 py-1 rounded">
|
||||||
|
{source.license}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-warm-500 dark:text-warm-400 mb-2">Source: {source.origin}</p>
|
||||||
|
<p className="text-sm text-warm-700 dark:text-warm-300 mb-3">{source.use}</p>
|
||||||
|
<a
|
||||||
|
href={source.url}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline break-all"
|
||||||
|
>
|
||||||
|
{source.url}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<footer className="bg-navy-900 text-warm-400 px-6 py-6">
|
||||||
|
<div className="max-w-5xl mx-auto">
|
||||||
|
<h2 className="text-sm font-semibold text-warm-300 uppercase tracking-wide mb-3">
|
||||||
|
Attribution
|
||||||
|
</h2>
|
||||||
|
<ul className="space-y-1.5 text-sm">
|
||||||
|
<li>Contains HM Land Registry data © Crown copyright and database right 2025.</li>
|
||||||
|
<li>
|
||||||
|
Contains public sector information licensed under the{' '}
|
||||||
|
<a
|
||||||
|
href="https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-teal-400 hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
Open Government Licence v3.0
|
||||||
|
</a>
|
||||||
|
.
|
||||||
|
</li>
|
||||||
|
<li>Contains OS data © Crown copyright and database rights 2025.</li>
|
||||||
|
<li>Powered by TfL Open Data.</li>
|
||||||
|
<li>
|
||||||
|
Contains data from{' '}
|
||||||
|
<a
|
||||||
|
href="https://www.openstreetmap.org/copyright"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-teal-400 hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
© OpenStreetMap contributors
|
||||||
|
</a>
|
||||||
|
, available under the{' '}
|
||||||
|
<a
|
||||||
|
href="https://opendatacommons.org/licenses/odbl/"
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-teal-400 hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
Open Data Commons Open Database License (ODbL)
|
||||||
|
</a>
|
||||||
|
.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
119
frontend/src/components/FAQPage.tsx
Normal file
119
frontend/src/components/FAQPage.tsx
Normal file
|
|
@ -0,0 +1,119 @@
|
||||||
|
import { useState } from 'react';
|
||||||
|
|
||||||
|
interface FAQItem {
|
||||||
|
question: string;
|
||||||
|
answer: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FAQ_ITEMS: FAQItem[] = [
|
||||||
|
{
|
||||||
|
question: 'What is this application?',
|
||||||
|
answer:
|
||||||
|
'Narrowit is an interactive map that visualises property-level data across England and Wales. It combines Land Registry sale prices, EPC energy certificates, TfL journey times, deprivation indices, crime statistics, broadband speeds, school ratings, road noise levels, ethnicity demographics, and OpenStreetMap points of interest into a single explorable view.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'Where does the data come from?',
|
||||||
|
answer:
|
||||||
|
'All data comes from open government and community sources. Property prices are from HM Land Registry, energy certificates from MHCLG, transport times from TfL, deprivation scores from the English Indices of Deprivation 2025, crime data from data.police.uk, school ratings from Ofsted, broadband from Ofcom, noise from Defra, ethnicity from the 2021 Census, and points of interest from OpenStreetMap. See the Data Sources page for full details and links.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'What are the coloured hexagons on the map?',
|
||||||
|
answer:
|
||||||
|
'The map uses H3 hexagons to aggregate property data at different zoom levels. Each hexagon summarises the properties within it. The colour represents the value of whichever feature you have pinned or are actively filtering — for example, average price or energy rating. Zoom in to see smaller, more detailed hexagons; zoom out for a broader overview.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'How do filters work?',
|
||||||
|
answer:
|
||||||
|
'Use the Filters panel on the left to narrow down properties. Add a filter by clicking a feature name, then drag the range slider to set minimum and maximum values. For categorical features like property type, select or deselect individual values. Only hexagons containing properties that match all active filters are shown. Filters are combined with AND logic — every property must satisfy every filter.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'What does the eye icon do on a filter?',
|
||||||
|
answer:
|
||||||
|
'The eye icon pins a feature as the colour source for the hexagon layer. When pinned, hexagons are coloured by that feature\'s value range even when you are not actively dragging its slider. This lets you visualise one feature while filtering on others. Click the eye icon again to unpin.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'How fresh is the data?',
|
||||||
|
answer:
|
||||||
|
'Property prices cover all Land Registry transactions up to the most recent quarterly release. EPC data includes certificates issued up to the latest available download. Crime data spans 2023–2025 as yearly averages. TfL journey times are computed from current timetables. Deprivation indices are from the 2025 release. School ratings reflect the latest Ofsted inspections as at April 2025. Broadband data is from Ofcom Connected Nations 2025.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'How are EPC records matched to Land Registry sales?',
|
||||||
|
answer:
|
||||||
|
'EPC and Land Registry records don\'t share a common identifier, so they are fuzzy-joined by address within each postcode bucket. The pipeline uses token-sorted string similarity with special handling for numeric tokens (house numbers, flat numbers). Matches are assigned greedily from highest similarity score downward so each record is used at most once.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'What are Points of Interest (POIs)?',
|
||||||
|
answer:
|
||||||
|
'POIs are places like cafes, schools, supermarkets, GP surgeries, parks, and train stations extracted from OpenStreetMap and the NaPTAN public transport dataset. Use the POI panel on the right to toggle categories on and off. POIs appear as markers on the map when you are zoomed in far enough.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'Can I share a specific view with someone?',
|
||||||
|
answer:
|
||||||
|
'Yes. The URL updates automatically as you pan, zoom, and change filters. Click the Share button in the header to copy the current URL to your clipboard. Anyone who opens that link will see the same view, filters, and active POI categories.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'How do I see individual properties?',
|
||||||
|
answer:
|
||||||
|
'Click on a hexagon to open the Properties panel on the right. It lists all matching properties within that hexagon, showing address, price, and key features. Use "Load more" at the bottom to paginate through large hexagons.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'Why are some hexagons grey?',
|
||||||
|
answer:
|
||||||
|
'Grey hexagons contain properties that have data but fall outside the range of your currently pinned or active feature. This gives you a sense of where properties exist even when their values are outside your selected range.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
question: 'Does this work on mobile?',
|
||||||
|
answer:
|
||||||
|
'The app is designed for desktop browsers where you have enough screen space for the map, filter panel, and POI/properties panel side by side. It will load on mobile but the experience is best on a larger screen.',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
function FAQItemCard({ item }: { item: FAQItem }) {
|
||||||
|
const [open, setOpen] = useState(false);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-white dark:bg-navy-800 rounded-lg border border-warm-200 dark:border-navy-700">
|
||||||
|
<button
|
||||||
|
className="w-full text-left px-5 py-4 flex items-center justify-between gap-4"
|
||||||
|
onClick={() => setOpen(!open)}
|
||||||
|
>
|
||||||
|
<span className="font-medium text-warm-900 dark:text-warm-100">{item.question}</span>
|
||||||
|
<svg
|
||||||
|
className={`w-5 h-5 shrink-0 text-warm-400 dark:text-warm-500 transform ${open ? 'rotate-180' : ''}`}
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
strokeWidth={2}
|
||||||
|
>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M19 9l-7 7-7-7" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
{open && (
|
||||||
|
<div className="px-5 pb-4">
|
||||||
|
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">{item.answer}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function FAQPage() {
|
||||||
|
return (
|
||||||
|
<div className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950">
|
||||||
|
<div className="max-w-3xl mx-auto px-6 py-8">
|
||||||
|
<h1 className="text-2xl font-bold text-warm-900 dark:text-warm-100 mb-2">
|
||||||
|
Frequently Asked Questions
|
||||||
|
</h1>
|
||||||
|
<p className="text-warm-600 dark:text-warm-400 mb-6">
|
||||||
|
Common questions about how Narrowit works, where the data comes from, and how to use the
|
||||||
|
map.
|
||||||
|
</p>
|
||||||
|
<div className="space-y-3">
|
||||||
|
{FAQ_ITEMS.map((item, index) => (
|
||||||
|
<FAQItemCard key={index} item={item} />
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1,119 +1,466 @@
|
||||||
|
import { memo, useState, useRef, useCallback, useMemo, useEffect } from 'react';
|
||||||
import { Slider } from './ui/slider';
|
import { Slider } from './ui/slider';
|
||||||
import { Label } from './ui/label';
|
import { Label } from './ui/label';
|
||||||
import { YEAR_MIN, YEAR_MAX, YEAR_STEP, PRICE_MIN, PRICE_MAX, PRICE_STEP } from '../lib/constants';
|
import type { FeatureMeta, FeatureFilters } from '../types';
|
||||||
import type { Filters as FiltersType, POICategoryGroup } from '../types';
|
|
||||||
import { POI_CATEGORY_GROUPS } from '../types';
|
|
||||||
|
|
||||||
interface FiltersProps {
|
interface FiltersProps {
|
||||||
filters: FiltersType;
|
features: FeatureMeta[];
|
||||||
onChange: (filters: FiltersType) => void;
|
filters: FeatureFilters;
|
||||||
|
activeFeature: string | null;
|
||||||
|
dragValue: [number, number] | null;
|
||||||
|
enabledFeatures: Set<string>;
|
||||||
|
onAddFilter: (name: string) => void;
|
||||||
|
onRemoveFilter: (name: string) => void;
|
||||||
|
onFilterChange: (name: string, value: [number, number] | string[]) => void;
|
||||||
|
onDragStart: (name: string) => void;
|
||||||
|
onDragChange: (value: [number, number]) => void;
|
||||||
|
onDragEnd: () => void;
|
||||||
zoom: number;
|
zoom: number;
|
||||||
selectedPOICategories: Set<POICategoryGroup>;
|
pinnedFeature: string | null;
|
||||||
onPOICategoriesChange: (categories: Set<POICategoryGroup>) => void;
|
onTogglePin: (name: string) => void;
|
||||||
|
onCancelPin: () => void;
|
||||||
|
onNavigateToSource?: (slug: string, featureName: string) => void;
|
||||||
|
openInfoFeature?: string | null;
|
||||||
|
onClearOpenInfoFeature?: () => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
const POI_LABELS: Record<POICategoryGroup, string> = {
|
function EyeIcon({ filled, className }: { filled: boolean; className?: string }) {
|
||||||
schools: '🏫 Schools',
|
return (
|
||||||
healthcare: '🏥 Healthcare',
|
<svg
|
||||||
transport: '🚉 Transport',
|
className={className || 'w-3.5 h-3.5'}
|
||||||
parks: '🌳 Parks',
|
viewBox="0 0 24 24"
|
||||||
emergency: '🚨 Emergency',
|
fill={filled ? 'currentColor' : 'none'}
|
||||||
supermarkets: '🛒 Supermarkets',
|
stroke="currentColor"
|
||||||
};
|
strokeWidth={2}
|
||||||
|
>
|
||||||
|
<path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z" />
|
||||||
|
<circle cx="12" cy="12" r="3" />
|
||||||
|
</svg>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export default function Filters({
|
function InfoPopup({
|
||||||
filters,
|
feature,
|
||||||
onChange,
|
onClose,
|
||||||
zoom,
|
onNavigateToSource,
|
||||||
selectedPOICategories,
|
}: {
|
||||||
onPOICategoriesChange,
|
feature: FeatureMeta;
|
||||||
}: FiltersProps) {
|
onClose: () => void;
|
||||||
const update = (key: keyof FiltersType, value: number) => onChange({ ...filters, [key]: value });
|
onNavigateToSource?: (slug: string, featureName: string) => void;
|
||||||
|
}) {
|
||||||
|
const popupRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
const togglePOICategory = (category: POICategoryGroup) => {
|
useEffect(() => {
|
||||||
const newSet = new Set(selectedPOICategories);
|
function handleClickOutside(e: MouseEvent) {
|
||||||
if (newSet.has(category)) {
|
if (popupRef.current && !popupRef.current.contains(e.target as Node)) {
|
||||||
newSet.delete(category);
|
onClose();
|
||||||
} else {
|
}
|
||||||
newSet.add(category);
|
|
||||||
}
|
}
|
||||||
onPOICategoriesChange(newSet);
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
};
|
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
}, [onClose]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="w-72 p-4 bg-white shadow-lg space-y-6 overflow-y-auto max-h-screen">
|
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
|
||||||
<h1 className="text-xl font-bold">UK Property Prices</h1>
|
<div
|
||||||
|
ref={popupRef}
|
||||||
<div className="text-sm text-slate-500">Zoom: {zoom.toFixed(1)}</div>
|
className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
|
||||||
|
>
|
||||||
<div className="space-y-2">
|
<div className="flex items-start justify-between mb-3">
|
||||||
<Label>
|
<h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
|
||||||
Year Range: {filters.minYear} - {filters.maxYear}
|
{feature.name}
|
||||||
</Label>
|
</h3>
|
||||||
<Slider
|
<button
|
||||||
min={YEAR_MIN}
|
onClick={onClose}
|
||||||
max={YEAR_MAX}
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
|
||||||
step={YEAR_STEP}
|
>
|
||||||
value={[filters.minYear, filters.maxYear]}
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
onValueChange={([min, max]) => onChange({ ...filters, minYear: min, maxYear: max })}
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
/>
|
</svg>
|
||||||
</div>
|
</button>
|
||||||
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label>Min Price: £{filters.minPrice.toLocaleString()}</Label>
|
|
||||||
<Slider
|
|
||||||
min={PRICE_MIN}
|
|
||||||
max={PRICE_MAX}
|
|
||||||
step={PRICE_STEP}
|
|
||||||
value={[filters.minPrice]}
|
|
||||||
onValueChange={([v]) => update('minPrice', v)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label>Max Price: £{filters.maxPrice.toLocaleString()}</Label>
|
|
||||||
<Slider
|
|
||||||
min={PRICE_MIN}
|
|
||||||
max={PRICE_MAX}
|
|
||||||
step={PRICE_STEP}
|
|
||||||
value={[filters.maxPrice]}
|
|
||||||
onValueChange={([v]) => update('maxPrice', v)}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="mt-6 p-3 bg-slate-100 rounded text-xs">
|
|
||||||
<div className="mb-2 font-medium">Average Price</div>
|
|
||||||
<div
|
|
||||||
className="h-4 rounded"
|
|
||||||
style={{
|
|
||||||
background:
|
|
||||||
'linear-gradient(to right, rgb(46, 204, 113), rgb(241, 196, 15), rgb(231, 76, 60), rgb(142, 68, 173))',
|
|
||||||
}}
|
|
||||||
></div>
|
|
||||||
<div className="flex justify-between mt-1">
|
|
||||||
<span>£0</span>
|
|
||||||
<span>£200k</span>
|
|
||||||
<span>£400k</span>
|
|
||||||
<span>£800k+</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="space-y-2">
|
|
||||||
<Label>Points of Interest</Label>
|
|
||||||
<div className="space-y-1">
|
|
||||||
{POI_CATEGORY_GROUPS.map((category) => (
|
|
||||||
<label key={category} className="flex items-center gap-2 cursor-pointer">
|
|
||||||
<input
|
|
||||||
type="checkbox"
|
|
||||||
checked={selectedPOICategories.has(category)}
|
|
||||||
onChange={() => togglePOICategory(category)}
|
|
||||||
className="rounded"
|
|
||||||
/>
|
|
||||||
<span className="text-sm">{POI_LABELS[category]}</span>
|
|
||||||
</label>
|
|
||||||
))}
|
|
||||||
</div>
|
</div>
|
||||||
|
{feature.description && (
|
||||||
|
<p className="text-xs text-warm-500 dark:text-warm-400 mb-2">{feature.description}</p>
|
||||||
|
)}
|
||||||
|
{feature.detail && (
|
||||||
|
<p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">{feature.detail}</p>
|
||||||
|
)}
|
||||||
|
{feature.source && onNavigateToSource && (
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
onNavigateToSource(feature.source!, feature.name);
|
||||||
|
onClose();
|
||||||
|
}}
|
||||||
|
className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
View data source
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function FeatureBrowser({
|
||||||
|
availableFeatures,
|
||||||
|
allFeatures,
|
||||||
|
pinnedFeature,
|
||||||
|
onAddFilter,
|
||||||
|
onTogglePin,
|
||||||
|
onNavigateToSource,
|
||||||
|
openInfoFeature,
|
||||||
|
onClearOpenInfoFeature,
|
||||||
|
}: {
|
||||||
|
availableFeatures: FeatureMeta[];
|
||||||
|
allFeatures: FeatureMeta[];
|
||||||
|
pinnedFeature: string | null;
|
||||||
|
onAddFilter: (name: string) => void;
|
||||||
|
onTogglePin: (name: string) => void;
|
||||||
|
onNavigateToSource?: (slug: string, featureName: string) => void;
|
||||||
|
openInfoFeature?: string | null;
|
||||||
|
onClearOpenInfoFeature?: () => void;
|
||||||
|
}) {
|
||||||
|
const [search, setSearch] = useState('');
|
||||||
|
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
|
||||||
|
|
||||||
|
// Auto-open info popup when navigating back
|
||||||
|
useEffect(() => {
|
||||||
|
if (openInfoFeature) {
|
||||||
|
const feat = allFeatures.find((f) => f.name === openInfoFeature);
|
||||||
|
if (feat) setInfoFeature(feat);
|
||||||
|
onClearOpenInfoFeature?.();
|
||||||
|
}
|
||||||
|
}, [openInfoFeature, allFeatures, onClearOpenInfoFeature]);
|
||||||
|
|
||||||
|
const filtered = useMemo(() => {
|
||||||
|
if (!search) return availableFeatures;
|
||||||
|
const lower = search.toLowerCase();
|
||||||
|
return availableFeatures.filter((f) => f.name.toLowerCase().includes(lower));
|
||||||
|
}, [availableFeatures, search]);
|
||||||
|
|
||||||
|
const grouped = useMemo(() => {
|
||||||
|
const groups: { name: string; features: FeatureMeta[] }[] = [];
|
||||||
|
const seen = new Map<string, FeatureMeta[]>();
|
||||||
|
for (const f of filtered) {
|
||||||
|
const g = f.group || 'Other';
|
||||||
|
let arr = seen.get(g);
|
||||||
|
if (!arr) {
|
||||||
|
arr = [];
|
||||||
|
seen.set(g, arr);
|
||||||
|
groups.push({ name: g, features: arr });
|
||||||
|
}
|
||||||
|
arr.push(f);
|
||||||
|
}
|
||||||
|
return groups;
|
||||||
|
}, [filtered]);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<div className="p-2 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Search features..."
|
||||||
|
value={search}
|
||||||
|
onChange={(e) => setSearch(e.target.value)}
|
||||||
|
className="w-full px-2 py-1 text-sm border rounded bg-white dark:bg-navy-800 dark:text-warm-200 border-warm-200 dark:border-navy-700 placeholder-warm-400 dark:placeholder-warm-500 focus:outline-none focus:ring-1 focus:ring-teal-400"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex-1 overflow-y-auto">
|
||||||
|
{grouped.map((group) => (
|
||||||
|
<div key={group.name}>
|
||||||
|
<div className="px-3 py-1.5 text-xs font-bold text-warm-500 bg-warm-50 dark:bg-navy-950 dark:text-warm-400 sticky top-0">
|
||||||
|
{group.name}
|
||||||
|
</div>
|
||||||
|
{group.features.map((f) => {
|
||||||
|
const isPinned = pinnedFeature === f.name;
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={f.name}
|
||||||
|
className="flex items-start justify-between px-3 py-1.5 hover:bg-teal-50 dark:hover:bg-teal-900/30 dark:text-warm-300"
|
||||||
|
>
|
||||||
|
<div className="min-w-0 mr-2">
|
||||||
|
<span className="text-sm truncate block">{f.name}</span>
|
||||||
|
{f.description && (
|
||||||
|
<span className="text-xs text-warm-400 dark:text-warm-500 truncate block">{f.description}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-1 shrink-0 mt-0.5">
|
||||||
|
{f.detail && (
|
||||||
|
<button
|
||||||
|
onClick={() => setInfoFeature(f)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
|
||||||
|
title="Feature info"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<circle cx="12" cy="12" r="10" />
|
||||||
|
<path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => onTogglePin(f.name)}
|
||||||
|
className={`p-0.5 rounded ${isPinned ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
|
||||||
|
title={isPinned ? 'Unpin color view' : 'Color map by this feature'}
|
||||||
|
>
|
||||||
|
<EyeIcon filled={isPinned} />
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onAddFilter(f.name)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
|
||||||
|
title="Add filter"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M12 5v14m-7-7h14" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
{grouped.length === 0 && (
|
||||||
|
<div className="px-3 py-4 text-sm text-warm-400 dark:text-warm-500 text-center">
|
||||||
|
{search ? 'No matching features' : 'All features are active'}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{infoFeature && (
|
||||||
|
<InfoPopup
|
||||||
|
feature={infoFeature}
|
||||||
|
onClose={() => setInfoFeature(null)}
|
||||||
|
onNavigateToSource={onNavigateToSource}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatValue(value: number): string {
|
||||||
|
if (Math.abs(value) >= 1_000_000) return `${(value / 1_000_000).toFixed(1)}M`;
|
||||||
|
if (Math.abs(value) >= 1_000) return `${(value / 1_000).toFixed(1)}k`;
|
||||||
|
if (Number.isInteger(value)) return value.toString();
|
||||||
|
return value.toFixed(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default memo(function Filters({
|
||||||
|
features,
|
||||||
|
filters,
|
||||||
|
activeFeature,
|
||||||
|
dragValue,
|
||||||
|
enabledFeatures,
|
||||||
|
onAddFilter,
|
||||||
|
onRemoveFilter,
|
||||||
|
onFilterChange,
|
||||||
|
onDragStart,
|
||||||
|
onDragChange,
|
||||||
|
onDragEnd,
|
||||||
|
zoom,
|
||||||
|
pinnedFeature,
|
||||||
|
onTogglePin,
|
||||||
|
onCancelPin,
|
||||||
|
onNavigateToSource,
|
||||||
|
openInfoFeature,
|
||||||
|
onClearOpenInfoFeature,
|
||||||
|
}: FiltersProps) {
|
||||||
|
const availableFeatures = features.filter((f) => !enabledFeatures.has(f.name));
|
||||||
|
const enabledFeatureList = features.filter((f) => enabledFeatures.has(f.name));
|
||||||
|
|
||||||
|
const containerRef = useRef<HTMLDivElement>(null);
|
||||||
|
const [splitFraction, setSplitFraction] = useState(0.65);
|
||||||
|
const draggingRef = useRef(false);
|
||||||
|
|
||||||
|
const handleSeparatorPointerDown = useCallback(
|
||||||
|
(e: React.PointerEvent) => {
|
||||||
|
e.preventDefault();
|
||||||
|
(e.target as HTMLElement).setPointerCapture(e.pointerId);
|
||||||
|
draggingRef.current = true;
|
||||||
|
},
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleSeparatorPointerMove = useCallback(
|
||||||
|
(e: React.PointerEvent) => {
|
||||||
|
if (!draggingRef.current || !containerRef.current) return;
|
||||||
|
const rect = containerRef.current.getBoundingClientRect();
|
||||||
|
const y = e.clientY - rect.top;
|
||||||
|
const fraction = Math.min(0.8, Math.max(0.15, y / rect.height));
|
||||||
|
setSplitFraction(fraction);
|
||||||
|
},
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
|
||||||
|
const handleSeparatorPointerUp = useCallback(() => {
|
||||||
|
draggingRef.current = false;
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div ref={containerRef} className="w-80 flex flex-col bg-white dark:bg-navy-950 shadow-lg overflow-hidden">
|
||||||
|
{/* Top: Active filters — user-resizable, scrollable */}
|
||||||
|
<div className="min-h-0 flex flex-col" style={{ height: `${splitFraction * 100}%` }}>
|
||||||
|
{/* Active Filters header */}
|
||||||
|
<div className="shrink-0 flex items-center justify-between px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Active Filters</span>
|
||||||
|
{enabledFeatureList.length > 0 && (
|
||||||
|
<span className="text-xs font-medium px-1.5 py-0.5 rounded-full bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
|
||||||
|
{enabledFeatureList.length}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<span className="text-xs text-warm-500 dark:text-warm-400">Zoom {zoom.toFixed(1)}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="flex-1 overflow-y-auto p-3 space-y-3">
|
||||||
|
{enabledFeatureList.length === 0 && (
|
||||||
|
<div className="flex flex-col items-center justify-center py-8 text-center">
|
||||||
|
<svg className="w-8 h-8 text-warm-300 dark:text-warm-600 mb-2" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={1.5}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M12 3c2.755 0 5.455.232 8.083.678.533.09.917.556.917 1.096v1.044a2.25 2.25 0 01-.659 1.591l-5.432 5.432a2.25 2.25 0 00-.659 1.591v2.927a2.25 2.25 0 01-1.244 2.013L9.75 21v-6.568a2.25 2.25 0 00-.659-1.591L3.659 7.409A2.25 2.25 0 013 5.818V4.774c0-.54.384-1.006.917-1.096A48.32 48.32 0 0112 3z" />
|
||||||
|
</svg>
|
||||||
|
<span className="text-sm font-medium text-warm-400 dark:text-warm-500">No active filters</span>
|
||||||
|
<span className="text-xs text-warm-400 dark:text-warm-500 mt-1">Browse features below and click + to add a filter</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{enabledFeatureList.map((feature) => {
|
||||||
|
if (feature.type === 'enum') {
|
||||||
|
const selectedValues = (filters[feature.name] as string[]) || [];
|
||||||
|
const allValues = feature.values || [];
|
||||||
|
return (
|
||||||
|
<div key={feature.name} className={`space-y-1 p-3 rounded ${pinnedFeature === feature.name ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<Label>{feature.name}</Label>
|
||||||
|
<div className="flex items-center gap-0.5">
|
||||||
|
<button
|
||||||
|
onClick={() => onTogglePin(feature.name)}
|
||||||
|
className={`p-0.5 rounded ${pinnedFeature === feature.name ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
|
||||||
|
title={pinnedFeature === feature.name ? 'Unpin color view' : 'Color map by this feature'}
|
||||||
|
>
|
||||||
|
<EyeIcon filled={pinnedFeature === feature.name} />
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onRemoveFilter(feature.name)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 text-sm px-1"
|
||||||
|
title="Remove filter"
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex gap-2 text-sm mb-1">
|
||||||
|
<button
|
||||||
|
className="text-teal-600 dark:text-teal-400 hover:underline"
|
||||||
|
onClick={() => onFilterChange(feature.name, [...allValues])}
|
||||||
|
>
|
||||||
|
All
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="text-teal-600 dark:text-teal-400 hover:underline"
|
||||||
|
onClick={() => onFilterChange(feature.name, [])}
|
||||||
|
>
|
||||||
|
None
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className="space-y-0.5 max-h-40 overflow-y-auto">
|
||||||
|
{allValues.map((val) => (
|
||||||
|
<label key={val} className="flex items-center gap-1.5 text-sm cursor-pointer dark:text-warm-300">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={selectedValues.includes(val)}
|
||||||
|
onChange={() => {
|
||||||
|
const next = selectedValues.includes(val)
|
||||||
|
? selectedValues.filter((v) => v !== val)
|
||||||
|
: [...selectedValues, val];
|
||||||
|
onFilterChange(feature.name, next);
|
||||||
|
}}
|
||||||
|
className="rounded accent-teal-600"
|
||||||
|
/>
|
||||||
|
{val}
|
||||||
|
</label>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Numeric feature
|
||||||
|
const isActive = activeFeature === feature.name;
|
||||||
|
const isPinned = pinnedFeature === feature.name;
|
||||||
|
const displayValue =
|
||||||
|
isActive && dragValue
|
||||||
|
? dragValue
|
||||||
|
: (filters[feature.name] as [number, number]) || [feature.min!, feature.max!];
|
||||||
|
const step = feature.step ?? (feature.max! - feature.min!) / 100;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={feature.name}
|
||||||
|
className={`space-y-1 p-3 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<Label>
|
||||||
|
{feature.name}: {formatValue(displayValue[0])} - {formatValue(displayValue[1])}
|
||||||
|
</Label>
|
||||||
|
<div className="flex items-center gap-0.5">
|
||||||
|
<button
|
||||||
|
onClick={() => onTogglePin(feature.name)}
|
||||||
|
className={`p-0.5 rounded ${isPinned ? 'text-teal-600 dark:text-teal-400' : 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'}`}
|
||||||
|
title={isPinned ? 'Unpin color view' : 'Color map by this feature'}
|
||||||
|
>
|
||||||
|
<EyeIcon filled={isPinned} />
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => onRemoveFilter(feature.name)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 text-sm px-1"
|
||||||
|
title="Remove filter"
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<Slider
|
||||||
|
min={feature.min!}
|
||||||
|
max={feature.max!}
|
||||||
|
step={step}
|
||||||
|
value={[displayValue[0], displayValue[1]]}
|
||||||
|
onValueChange={([min, max]) => onDragChange([min, max])}
|
||||||
|
onPointerDown={() => onDragStart(feature.name)}
|
||||||
|
onPointerUp={() => onDragEnd()}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Draggable separator */}
|
||||||
|
<div
|
||||||
|
className="shrink-0 h-1.5 cursor-row-resize flex items-center justify-center bg-warm-100 dark:bg-navy-800 hover:bg-warm-200 dark:hover:bg-navy-700 border-y border-warm-200 dark:border-navy-700"
|
||||||
|
onPointerDown={handleSeparatorPointerDown}
|
||||||
|
onPointerMove={handleSeparatorPointerMove}
|
||||||
|
onPointerUp={handleSeparatorPointerUp}
|
||||||
|
>
|
||||||
|
<div className="w-8 h-0.5 rounded bg-warm-300 dark:bg-navy-600" />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Bottom: Feature browser — fills remaining space */}
|
||||||
|
<div className="min-h-0 flex-1 flex flex-col">
|
||||||
|
<div className="shrink-0 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<span className="text-sm font-semibold text-navy-950 dark:text-warm-100">Add Filter</span>
|
||||||
|
</div>
|
||||||
|
<div className="min-h-0 flex-1 flex flex-col">
|
||||||
|
<FeatureBrowser
|
||||||
|
availableFeatures={availableFeatures}
|
||||||
|
allFeatures={features}
|
||||||
|
pinnedFeature={pinnedFeature}
|
||||||
|
onAddFilter={onAddFilter}
|
||||||
|
onTogglePin={onTogglePin}
|
||||||
|
onNavigateToSource={onNavigateToSource}
|
||||||
|
openInfoFeature={openInfoFeature}
|
||||||
|
onClearOpenInfoFeature={onClearOpenInfoFeature}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
|
||||||
367
frontend/src/components/HomePage.tsx
Normal file
367
frontend/src/components/HomePage.tsx
Normal file
|
|
@ -0,0 +1,367 @@
|
||||||
|
import { useRef, useState, useEffect, useCallback } from 'react';
|
||||||
|
|
||||||
|
// --- Floating hex particle canvas that reacts to scroll ---
|
||||||
|
|
||||||
|
const HEX_COUNT = 60;
|
||||||
|
const TAU = Math.PI * 2;
|
||||||
|
|
||||||
|
interface Hex {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
baseY: number;
|
||||||
|
size: number;
|
||||||
|
opacity: number;
|
||||||
|
speed: number; // horizontal drift px/s
|
||||||
|
phase: number; // for gentle bob
|
||||||
|
}
|
||||||
|
|
||||||
|
function initHexes(w: number, h: number): Hex[] {
|
||||||
|
const hexes: Hex[] = [];
|
||||||
|
for (let i = 0; i < HEX_COUNT; i++) {
|
||||||
|
const y = Math.random() * h;
|
||||||
|
hexes.push({
|
||||||
|
x: Math.random() * w,
|
||||||
|
y,
|
||||||
|
baseY: y,
|
||||||
|
size: 8 + Math.random() * 20,
|
||||||
|
opacity: 0.06 + Math.random() * 0.12,
|
||||||
|
speed: 6 + Math.random() * 14,
|
||||||
|
phase: Math.random() * TAU,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return hexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
function drawHex(ctx: CanvasRenderingContext2D, cx: number, cy: number, r: number) {
|
||||||
|
ctx.beginPath();
|
||||||
|
for (let i = 0; i < 6; i++) {
|
||||||
|
const angle = (TAU / 6) * i - Math.PI / 6;
|
||||||
|
const px = cx + r * Math.cos(angle);
|
||||||
|
const py = cy + r * Math.sin(angle);
|
||||||
|
if (i === 0) ctx.moveTo(px, py);
|
||||||
|
else ctx.lineTo(px, py);
|
||||||
|
}
|
||||||
|
ctx.closePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
function HexCanvas({ scrollProgress, isDark = false }: { scrollProgress: number; isDark?: boolean }) {
|
||||||
|
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||||
|
const hexesRef = useRef<Hex[]>([]);
|
||||||
|
const animRef = useRef(0);
|
||||||
|
const scrollRef = useRef(scrollProgress);
|
||||||
|
scrollRef.current = scrollProgress;
|
||||||
|
const isDarkRef = useRef(isDark);
|
||||||
|
isDarkRef.current = isDark;
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const canvas = canvasRef.current;
|
||||||
|
if (!canvas) return;
|
||||||
|
const ctx = canvas.getContext('2d');
|
||||||
|
if (!ctx) return;
|
||||||
|
|
||||||
|
let w = 0;
|
||||||
|
let h = 0;
|
||||||
|
|
||||||
|
function resize() {
|
||||||
|
const dpr = window.devicePixelRatio || 1;
|
||||||
|
const rect = canvas!.parentElement!.getBoundingClientRect();
|
||||||
|
w = rect.width;
|
||||||
|
h = rect.height;
|
||||||
|
canvas!.width = w * dpr;
|
||||||
|
canvas!.height = h * dpr;
|
||||||
|
canvas!.style.width = `${w}px`;
|
||||||
|
canvas!.style.height = `${h}px`;
|
||||||
|
ctx!.setTransform(dpr, 0, 0, dpr, 0, 0);
|
||||||
|
hexesRef.current = initHexes(w, h);
|
||||||
|
}
|
||||||
|
|
||||||
|
resize();
|
||||||
|
const ro = new ResizeObserver(resize);
|
||||||
|
ro.observe(canvas.parentElement!);
|
||||||
|
|
||||||
|
let prev = performance.now();
|
||||||
|
|
||||||
|
function frame(now: number) {
|
||||||
|
const dt = (now - prev) / 1000;
|
||||||
|
prev = now;
|
||||||
|
const scroll = scrollRef.current;
|
||||||
|
ctx!.clearRect(0, 0, w, h);
|
||||||
|
|
||||||
|
// Teal accent color, fade to 0 as user scrolls down
|
||||||
|
const globalAlpha = Math.max(0, 1 - scroll * 2);
|
||||||
|
|
||||||
|
for (const hex of hexesRef.current) {
|
||||||
|
// drift right, wrap
|
||||||
|
hex.x = (hex.x + hex.speed * dt) % (w + hex.size * 2);
|
||||||
|
// gentle vertical bob + parallax push from scroll
|
||||||
|
const bob = Math.sin(now / 1000 + hex.phase) * 8;
|
||||||
|
const parallax = scroll * h * 0.3 * (hex.speed / 20);
|
||||||
|
hex.y = hex.baseY + bob - parallax;
|
||||||
|
|
||||||
|
// wrap vertically
|
||||||
|
if (hex.y < -hex.size * 2) hex.y += h + hex.size * 4;
|
||||||
|
if (hex.y > h + hex.size * 2) hex.y -= h + hex.size * 4;
|
||||||
|
|
||||||
|
const dark = isDarkRef.current;
|
||||||
|
ctx!.globalAlpha = hex.opacity * globalAlpha * (dark ? 0.6 : 1);
|
||||||
|
ctx!.fillStyle = dark ? '#058172' : '#00a28c';
|
||||||
|
drawHex(ctx!, hex.x, hex.y, hex.size);
|
||||||
|
ctx!.fill();
|
||||||
|
|
||||||
|
ctx!.globalAlpha = hex.opacity * 0.5 * globalAlpha * (dark ? 0.6 : 1);
|
||||||
|
ctx!.strokeStyle = dark ? '#0a665b' : '#05c9aa';
|
||||||
|
ctx!.lineWidth = 1;
|
||||||
|
drawHex(ctx!, hex.x, hex.y, hex.size);
|
||||||
|
ctx!.stroke();
|
||||||
|
}
|
||||||
|
|
||||||
|
animRef.current = requestAnimationFrame(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
animRef.current = requestAnimationFrame(frame);
|
||||||
|
return () => {
|
||||||
|
cancelAnimationFrame(animRef.current);
|
||||||
|
ro.disconnect();
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<canvas
|
||||||
|
ref={canvasRef}
|
||||||
|
className="absolute inset-0 pointer-events-none"
|
||||||
|
style={{ zIndex: 0 }}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Fade-in hook ---
|
||||||
|
|
||||||
|
function useFadeInRef() {
|
||||||
|
const ref = useRef<HTMLDivElement>(null);
|
||||||
|
useEffect(() => {
|
||||||
|
const el = ref.current;
|
||||||
|
if (!el) return;
|
||||||
|
const observer = new IntersectionObserver(
|
||||||
|
([entry]) => {
|
||||||
|
if (entry.isIntersecting) {
|
||||||
|
el.classList.add('fade-in-visible');
|
||||||
|
observer.unobserve(el);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ threshold: 0.15 }
|
||||||
|
);
|
||||||
|
observer.observe(el);
|
||||||
|
return () => observer.disconnect();
|
||||||
|
}, []);
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Page ---
|
||||||
|
|
||||||
|
export default function HomePage({ onOpenDashboard, theme = 'light' }: { onOpenDashboard: () => void; theme?: 'light' | 'dark' }) {
|
||||||
|
const scrollRef = useRef<HTMLDivElement>(null);
|
||||||
|
const [scrollProgress, setScrollProgress] = useState(0);
|
||||||
|
|
||||||
|
const handleScroll = useCallback(() => {
|
||||||
|
const el = scrollRef.current;
|
||||||
|
if (!el) return;
|
||||||
|
const max = el.scrollHeight - el.clientHeight;
|
||||||
|
if (max <= 0) return;
|
||||||
|
setScrollProgress(el.scrollTop / max);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const el = scrollRef.current;
|
||||||
|
if (!el) return;
|
||||||
|
el.addEventListener('scroll', handleScroll, { passive: true });
|
||||||
|
return () => el.removeEventListener('scroll', handleScroll);
|
||||||
|
}, [handleScroll]);
|
||||||
|
|
||||||
|
const heroRef = useFadeInRef();
|
||||||
|
const problemRef = useFadeInRef();
|
||||||
|
const filtersRef = useFadeInRef();
|
||||||
|
const howRef = useFadeInRef();
|
||||||
|
const numbersRef = useFadeInRef();
|
||||||
|
const ctaRef = useFadeInRef();
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div ref={scrollRef} className="flex-1 overflow-y-auto bg-warm-50 dark:bg-navy-950 relative">
|
||||||
|
<HexCanvas scrollProgress={scrollProgress} isDark={theme === 'dark'} />
|
||||||
|
|
||||||
|
<div className="relative" style={{ zIndex: 1 }}>
|
||||||
|
{/* Hero */}
|
||||||
|
<div className="max-w-3xl mx-auto px-6 pt-20 pb-24">
|
||||||
|
<div
|
||||||
|
ref={heroRef}
|
||||||
|
className="fade-in-section backdrop-blur-sm bg-warm-50/60 dark:bg-navy-950/60 rounded-2xl p-8 -mx-2"
|
||||||
|
>
|
||||||
|
<p className="text-teal-600 font-semibold tracking-wide uppercase text-sm mb-4">
|
||||||
|
Find where to live, not just what's for sale
|
||||||
|
</p>
|
||||||
|
<h1 className="text-5xl font-extrabold text-navy-950 dark:text-warm-100 mb-6 leading-[1.1] tracking-tight">
|
||||||
|
Every neighbourhood
|
||||||
|
<br />
|
||||||
|
in England & Wales.
|
||||||
|
<br />
|
||||||
|
<span className="text-teal-600">One map. Your rules.</span>
|
||||||
|
</h1>
|
||||||
|
<p className="text-xl text-warm-600 dark:text-warm-400 mb-8 leading-relaxed max-w-xl">
|
||||||
|
Set the commute, budget, school rating, noise level, and crime threshold you'll
|
||||||
|
accept. Narrowit shows you every area that qualifies — instantly.
|
||||||
|
</p>
|
||||||
|
<div className="flex items-center gap-4">
|
||||||
|
<button
|
||||||
|
onClick={onOpenDashboard}
|
||||||
|
className="px-7 py-3.5 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-base shadow-lg shadow-coral-500/25"
|
||||||
|
>
|
||||||
|
Explore the map
|
||||||
|
</button>
|
||||||
|
<span className="text-warm-400 text-sm">
|
||||||
|
No signup · Free · Open data
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* The flip */}
|
||||||
|
<div className="max-w-3xl mx-auto px-6 pb-20">
|
||||||
|
<div ref={problemRef} className="fade-in-section">
|
||||||
|
<div className="rounded-2xl backdrop-blur-sm bg-warm-50/40 dark:bg-navy-800/40 border border-warm-200/50 dark:border-navy-700/50 p-8">
|
||||||
|
<div className="grid md:grid-cols-2 gap-8">
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-warm-400 uppercase tracking-wide mb-2">
|
||||||
|
The old way
|
||||||
|
</h3>
|
||||||
|
<p className="text-warm-700 dark:text-warm-300 leading-relaxed">
|
||||||
|
Pick a postcode. Google the schools. Check crime stats on another site. Look up
|
||||||
|
commute times. Realise it's too expensive. Start over. Repeat 40 times.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-semibold text-teal-600 uppercase tracking-wide mb-2">
|
||||||
|
With Narrowit
|
||||||
|
</h3>
|
||||||
|
<p className="text-warm-700 dark:text-warm-300 leading-relaxed">
|
||||||
|
Tell the map what you need. Every hexagon that lights up is a place worth
|
||||||
|
looking at. Drill into any one to see individual properties, prices, and energy
|
||||||
|
ratings.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Filter showcase */}
|
||||||
|
<div className="max-w-4xl mx-auto px-6 pb-20">
|
||||||
|
<div ref={filtersRef} className="fade-in-section">
|
||||||
|
<h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-2 text-center">
|
||||||
|
12 datasets. One slider each.
|
||||||
|
</h2>
|
||||||
|
<p className="text-warm-500 dark:text-warm-400 text-center mb-10 max-w-lg mx-auto">
|
||||||
|
Every filter narrows the map in real time. Combine as many as you like.
|
||||||
|
</p>
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-3">
|
||||||
|
{FILTERS.map((f) => (
|
||||||
|
<div
|
||||||
|
key={f.label}
|
||||||
|
className="rounded-xl bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 p-4 shadow-sm hover:shadow-md hover:border-teal-300 dark:hover:border-teal-600 transition-all"
|
||||||
|
>
|
||||||
|
<div className="text-2xl mb-2">{f.icon}</div>
|
||||||
|
<div className="font-semibold text-navy-950 dark:text-warm-100 text-sm">{f.label}</div>
|
||||||
|
<div className="text-xs text-warm-500 dark:text-warm-400 mt-0.5">{f.example}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* How it works */}
|
||||||
|
<div className="max-w-3xl mx-auto px-6 pb-20">
|
||||||
|
<div ref={howRef} className="fade-in-section">
|
||||||
|
<h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-10 text-center">
|
||||||
|
Three clicks to clarity
|
||||||
|
</h2>
|
||||||
|
<div className="space-y-6">
|
||||||
|
{STEPS.map((step, i) => (
|
||||||
|
<div key={i} className="flex gap-5 items-start">
|
||||||
|
<span className="shrink-0 w-10 h-10 rounded-full bg-teal-600 text-white flex items-center justify-center text-lg font-bold">
|
||||||
|
{i + 1}
|
||||||
|
</span>
|
||||||
|
<div>
|
||||||
|
<h3 className="font-semibold text-navy-950 dark:text-warm-100 text-lg">{step.title}</h3>
|
||||||
|
<p className="text-warm-600 dark:text-warm-400 mt-0.5">{step.body}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Numbers */}
|
||||||
|
<div className="max-w-3xl mx-auto px-6 pb-20">
|
||||||
|
<div ref={numbersRef} className="fade-in-section">
|
||||||
|
<div className="grid grid-cols-3 gap-6 text-center">
|
||||||
|
{STATS.map((s) => (
|
||||||
|
<div key={s.label}>
|
||||||
|
<div className="text-3xl font-extrabold text-teal-600">{s.value}</div>
|
||||||
|
<div className="text-sm text-warm-500 dark:text-warm-400 mt-1">{s.label}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Final CTA */}
|
||||||
|
<div className="max-w-3xl mx-auto px-6 pb-24">
|
||||||
|
<div ref={ctaRef} className="fade-in-section text-center">
|
||||||
|
<h2 className="text-3xl font-bold text-navy-950 dark:text-warm-100 mb-3">Ready to narrow it down?</h2>
|
||||||
|
<p className="text-warm-500 dark:text-warm-400 mb-8 max-w-md mx-auto">
|
||||||
|
100% open data. No account required. Just set your filters and go.
|
||||||
|
</p>
|
||||||
|
<button
|
||||||
|
onClick={onOpenDashboard}
|
||||||
|
className="px-8 py-4 bg-coral-500 text-white rounded-lg font-semibold hover:bg-coral-600 transition-colors text-lg shadow-lg shadow-coral-500/25"
|
||||||
|
>
|
||||||
|
Open the map
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Data ---
|
||||||
|
|
||||||
|
const FILTERS = [
|
||||||
|
{ icon: '\u00A3', label: 'Sale price', example: 'e.g. under \u00A3400k' },
|
||||||
|
{ icon: '\uD83D\uDE86', label: 'Commute time', example: 'e.g. < 45 min to Bank' },
|
||||||
|
{ icon: '\uD83C\uDFEB', label: 'School quality', example: 'Ofsted Outstanding' },
|
||||||
|
{ icon: '\uD83D\uDEA8', label: 'Crime rate', example: 'Low burglary areas' },
|
||||||
|
{ icon: '\u26A1', label: 'Energy rating', example: 'EPC band A\u2013C' },
|
||||||
|
{ icon: '\uD83D\uDCCF', label: 'Floor area', example: 'e.g. 80+ sqm' },
|
||||||
|
{ icon: '\uD83D\uDD07', label: 'Road noise', example: 'Below 55 dB Lden' },
|
||||||
|
{ icon: '\uD83C\uDF10', label: 'Broadband speed', example: '100+ Mbps available' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const STEPS = [
|
||||||
|
{
|
||||||
|
title: 'Add your deal-breakers',
|
||||||
|
body: 'Slide the filters for everything you care about \u2014 price cap, max commute, school quality, noise. The map updates as you drag.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: 'Spot the clusters',
|
||||||
|
body: 'Hexagons light up where properties match. Zoom in and they split into finer cells. At street level you see individual postcode boundaries.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: 'Dive into a neighbourhood',
|
||||||
|
body: 'Click any hexagon to see every property inside it \u2014 sale prices, floor plans, energy ratings, tenure. Layer on cafes, GP surgeries, and parks from OpenStreetMap.',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const STATS = [
|
||||||
|
{ value: '26M+', label: 'property records' },
|
||||||
|
{ value: '12', label: 'open datasets' },
|
||||||
|
{ value: '1.7M', label: 'postcodes mapped' },
|
||||||
|
];
|
||||||
|
|
@ -1,88 +1,41 @@
|
||||||
import { useCallback, useRef, useEffect, useState, useMemo } from 'react';
|
import { useCallback, useRef, useEffect, useState, useMemo, memo } from 'react';
|
||||||
import { Map as MapGL } from 'react-map-gl/maplibre';
|
import { Map as MapGL, useControl } from 'react-map-gl/maplibre';
|
||||||
import DeckGL from '@deck.gl/react';
|
import type { MapRef } from 'react-map-gl/maplibre';
|
||||||
|
import { MapboxOverlay } from '@deck.gl/mapbox';
|
||||||
import { H3HexagonLayer } from '@deck.gl/geo-layers';
|
import { H3HexagonLayer } from '@deck.gl/geo-layers';
|
||||||
import { IconLayer } from '@deck.gl/layers';
|
import { IconLayer, TextLayer } from '@deck.gl/layers';
|
||||||
import type { PickingInfo } from '@deck.gl/core';
|
import type { PickingInfo } from '@deck.gl/core';
|
||||||
import 'maplibre-gl/dist/maplibre-gl.css';
|
import 'maplibre-gl/dist/maplibre-gl.css';
|
||||||
import type { HexagonData, ViewState, ViewChangeParams, Bounds, POI } from '../types';
|
import type { HexagonData, ViewState, ViewChangeParams, Bounds, POI, FeatureMeta } from '../types';
|
||||||
|
|
||||||
interface MapProps {
|
interface MapProps {
|
||||||
data: HexagonData[];
|
data: HexagonData[];
|
||||||
pois: POI[];
|
pois: POI[];
|
||||||
onViewChange: (params: ViewChangeParams) => void;
|
onViewChange: (params: ViewChangeParams) => void;
|
||||||
|
viewFeature: string | null;
|
||||||
|
colorRange: [number, number] | null;
|
||||||
|
filterRange: [number, number] | null;
|
||||||
|
viewSource: 'drag' | 'eye' | null;
|
||||||
|
onCancelPin: () => void;
|
||||||
|
features: FeatureMeta[];
|
||||||
|
selectedHexagonId: string | null;
|
||||||
|
hoveredHexagonId: string | null;
|
||||||
|
onHexagonClick: (h3: string) => void;
|
||||||
|
onHexagonHover: (h3: string | null) => void;
|
||||||
|
initialViewState?: ViewState;
|
||||||
|
theme?: 'light' | 'dark';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Twemoji CDN base URL
|
// Twemoji CDN base URL
|
||||||
const TWEMOJI_BASE = 'https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72/';
|
const TWEMOJI_BASE = 'https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/72x72/';
|
||||||
|
|
||||||
// Map category to Twemoji codepoint (emoji unicode -> hex)
|
// Convert emoji to Twemoji URL
|
||||||
const POI_EMOJI_CODES: Record<string, string> = {
|
function emojiToTwemojiUrl(emoji: string): string {
|
||||||
// Schools
|
// Convert emoji to Unicode codepoint hex
|
||||||
elementary_school: '1f3eb', // 🏫
|
const codePoint = emoji.codePointAt(0);
|
||||||
school: '1f3eb',
|
if (!codePoint) return `${TWEMOJI_BASE}1f4cd.png`; // Default pin
|
||||||
high_school: '1f393', // 🎓
|
const hex = codePoint.toString(16);
|
||||||
preschool: '1f476', // 👶
|
return `${TWEMOJI_BASE}${hex}.png`;
|
||||||
college_university: '1f393',
|
|
||||||
private_school: '1f3eb',
|
|
||||||
// Healthcare
|
|
||||||
doctor: '1f3e5', // 🏥
|
|
||||||
dentist: '1f9b7', // 🦷
|
|
||||||
pharmacy: '1f48a', // 💊
|
|
||||||
hospital: '1f3e5',
|
|
||||||
public_health_clinic: '1f3e5',
|
|
||||||
// Transport
|
|
||||||
train_station: '1f689', // 🚉
|
|
||||||
bus_station: '1f68c', // 🚌
|
|
||||||
metro_station: '1f687', // 🚇
|
|
||||||
light_rail_and_subway_stations: '1f687',
|
|
||||||
// Parks
|
|
||||||
park: '1f333', // 🌳
|
|
||||||
national_park: '1f3de', // 🏞
|
|
||||||
dog_park: '1f415', // 🐕
|
|
||||||
// Emergency
|
|
||||||
police_department: '1f694', // 🚔
|
|
||||||
fire_department: '1f692', // 🚒
|
|
||||||
// Supermarkets
|
|
||||||
supermarket: '1f6d2', // 🛒
|
|
||||||
grocery_store: '1f6d2',
|
|
||||||
convenience_store: '1f3ea', // 🏪
|
|
||||||
};
|
|
||||||
|
|
||||||
function getPOIIconUrl(category: string): string {
|
|
||||||
const code = POI_EMOJI_CODES[category] || '1f4cd'; // 📍 default
|
|
||||||
return `${TWEMOJI_BASE}${code}.png`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tooltip emojis (these render fine in HTML)
|
|
||||||
const TOOLTIP_EMOJIS: Record<string, string> = {
|
|
||||||
elementary_school: '🏫',
|
|
||||||
school: '🏫',
|
|
||||||
high_school: '🎓',
|
|
||||||
preschool: '👶',
|
|
||||||
college_university: '🎓',
|
|
||||||
private_school: '🏫',
|
|
||||||
doctor: '👨⚕️',
|
|
||||||
dentist: '🦷',
|
|
||||||
pharmacy: '💊',
|
|
||||||
hospital: '🏥',
|
|
||||||
public_health_clinic: '🏥',
|
|
||||||
train_station: '🚉',
|
|
||||||
bus_station: '🚌',
|
|
||||||
metro_station: '🚇',
|
|
||||||
light_rail_and_subway_stations: '🚇',
|
|
||||||
park: '🌳',
|
|
||||||
national_park: '🏞️',
|
|
||||||
dog_park: '🐕',
|
|
||||||
police_department: '🚔',
|
|
||||||
fire_department: '🚒',
|
|
||||||
supermarket: '🛒',
|
|
||||||
grocery_store: '🛒',
|
|
||||||
convenience_store: '🏪',
|
|
||||||
};
|
|
||||||
|
|
||||||
function getTooltipEmoji(category: string): string {
|
|
||||||
return TOOLTIP_EMOJIS[category] || '📍';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const INITIAL_VIEW: ViewState = {
|
const INITIAL_VIEW: ViewState = {
|
||||||
|
|
@ -92,61 +45,44 @@ const INITIAL_VIEW: ViewState = {
|
||||||
pitch: 0,
|
pitch: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
const MAP_STYLE = 'https://basemaps.cartocdn.com/gl/positron-gl-style/style.json';
|
const MAP_STYLE_LIGHT = 'https://basemaps.cartocdn.com/gl/voyager-gl-style/style.json';
|
||||||
|
const MAP_STYLE_DARK = 'https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json';
|
||||||
|
|
||||||
interface ColorStop {
|
// Gradient stops for normalized [0,1] values
|
||||||
price: number;
|
const GRADIENT: { t: number; color: [number, number, number] }[] = [
|
||||||
color: [number, number, number];
|
{ t: 0, color: [46, 204, 113] }, // Green
|
||||||
}
|
{ t: 0.33, color: [241, 196, 15] }, // Yellow
|
||||||
|
{ t: 0.66, color: [231, 76, 60] }, // Red
|
||||||
// Continuous color scale from green (low) -> yellow -> red -> purple (high)
|
{ t: 1, color: [142, 68, 173] }, // Purple
|
||||||
const COLOR_SCALE: ColorStop[] = [
|
|
||||||
{ price: 0, color: [46, 204, 113] }, // Green
|
|
||||||
{ price: 200000, color: [241, 196, 15] }, // Yellow
|
|
||||||
{ price: 400000, color: [231, 76, 60] }, // Red
|
|
||||||
{ price: 800000, color: [142, 68, 173] }, // Purple
|
|
||||||
];
|
];
|
||||||
|
|
||||||
function interpolateColor(
|
function normalizedToColor(t: number): [number, number, number] {
|
||||||
c1: [number, number, number],
|
if (t <= 0) return GRADIENT[0].color;
|
||||||
c2: [number, number, number],
|
if (t >= 1) return GRADIENT[GRADIENT.length - 1].color;
|
||||||
t: number
|
|
||||||
): [number, number, number] {
|
|
||||||
return [
|
|
||||||
Math.round(c1[0] + (c2[0] - c1[0]) * t),
|
|
||||||
Math.round(c1[1] + (c2[1] - c1[1]) * t),
|
|
||||||
Math.round(c1[2] + (c2[2] - c1[2]) * t),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
function priceToColor(price: number | null | undefined): [number, number, number] {
|
for (let i = 0; i < GRADIENT.length - 1; i++) {
|
||||||
if (price == null || isNaN(price)) return [128, 128, 128]; // Gray for missing data
|
const lo = GRADIENT[i];
|
||||||
|
const hi = GRADIENT[i + 1];
|
||||||
// Clamp to scale range
|
if (t >= lo.t && t <= hi.t) {
|
||||||
if (price <= COLOR_SCALE[0].price) return COLOR_SCALE[0].color;
|
const frac = (t - lo.t) / (hi.t - lo.t);
|
||||||
if (price >= COLOR_SCALE[COLOR_SCALE.length - 1].price) {
|
return [
|
||||||
return COLOR_SCALE[COLOR_SCALE.length - 1].color;
|
Math.round(lo.color[0] + (hi.color[0] - lo.color[0]) * frac),
|
||||||
}
|
Math.round(lo.color[1] + (hi.color[1] - lo.color[1]) * frac),
|
||||||
|
Math.round(lo.color[2] + (hi.color[2] - lo.color[2]) * frac),
|
||||||
// Find the two colors to interpolate between
|
];
|
||||||
for (let i = 0; i < COLOR_SCALE.length - 1; i++) {
|
|
||||||
const lower = COLOR_SCALE[i];
|
|
||||||
const upper = COLOR_SCALE[i + 1];
|
|
||||||
if (price >= lower.price && price <= upper.price) {
|
|
||||||
const t = (price - lower.price) / (upper.price - lower.price);
|
|
||||||
return interpolateColor(lower.color, upper.color, t);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return GRADIENT[GRADIENT.length - 1].color;
|
||||||
return COLOR_SCALE[COLOR_SCALE.length - 1].color;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function zoomToResolution(zoom: number): number {
|
function zoomToResolution(zoom: number): number {
|
||||||
if (zoom < 8.5) return 7;
|
if (zoom < 6) return 5;
|
||||||
|
if (zoom < 7) return 6;
|
||||||
if (zoom < 9.5) return 8;
|
if (zoom < 9.5) return 8;
|
||||||
if (zoom < 11) return 9;
|
if (zoom < 11) return 9;
|
||||||
if (zoom < 13) return 10;
|
if (zoom < 13) return 10;
|
||||||
return 11;
|
if (zoom < 15) return 11;
|
||||||
|
return 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getBoundsFromViewState(viewState: ViewState, width: number, height: number): Bounds {
|
function getBoundsFromViewState(viewState: ViewState, width: number, height: number): Bounds {
|
||||||
|
|
@ -165,7 +101,6 @@ function getBoundsFromViewState(viewState: ViewState, width: number, height: num
|
||||||
const halfWidthDeg = (width / 2) * degreesPerPixelLng;
|
const halfWidthDeg = (width / 2) * degreesPerPixelLng;
|
||||||
|
|
||||||
// Latitude uses Mercator projection (non-linear)
|
// Latitude uses Mercator projection (non-linear)
|
||||||
// Convert center lat to pixel y, offset by half height, convert back to lat
|
|
||||||
const latRad = (clampedLat * Math.PI) / 180;
|
const latRad = (clampedLat * Math.PI) / 180;
|
||||||
const mercatorY = (1 - Math.log(Math.tan(latRad) + 1 / Math.cos(latRad)) / Math.PI) / 2;
|
const mercatorY = (1 - Math.log(Math.tan(latRad) + 1 / Math.cos(latRad)) / Math.PI) / 2;
|
||||||
const centerPixelY = mercatorY * worldSize;
|
const centerPixelY = mercatorY * worldSize;
|
||||||
|
|
@ -175,7 +110,7 @@ function getBoundsFromViewState(viewState: ViewState, width: number, height: num
|
||||||
|
|
||||||
// Convert pixel Y back to latitude
|
// Convert pixel Y back to latitude
|
||||||
const pixelYToLat = (pixelY: number): number => {
|
const pixelYToLat = (pixelY: number): number => {
|
||||||
const mercY = Math.max(0.001, Math.min(0.999, pixelY / worldSize)); // Clamp to avoid edge cases
|
const mercY = Math.max(0.001, Math.min(0.999, pixelY / worldSize));
|
||||||
const latRadians = Math.atan(Math.sinh(Math.PI * (1 - 2 * mercY)));
|
const latRadians = Math.atan(Math.sinh(Math.PI * (1 - 2 * mercY)));
|
||||||
return (latRadians * 180) / Math.PI;
|
return (latRadians * 180) / Math.PI;
|
||||||
};
|
};
|
||||||
|
|
@ -193,9 +128,215 @@ interface Dimensions {
|
||||||
height: number;
|
height: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function Map({ data, pois, onViewChange }: MapProps) {
|
function DeckOverlay({
|
||||||
|
layers,
|
||||||
|
getTooltip,
|
||||||
|
}: {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
layers: any[];
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
getTooltip: any;
|
||||||
|
}) {
|
||||||
|
const overlay = useControl(() => new MapboxOverlay({ interleaved: true }));
|
||||||
|
const prevLayersRef = useRef(layers);
|
||||||
|
const prevTooltipRef = useRef(getTooltip);
|
||||||
|
if (layers !== prevLayersRef.current || getTooltip !== prevTooltipRef.current) {
|
||||||
|
prevLayersRef.current = layers;
|
||||||
|
prevTooltipRef.current = getTooltip;
|
||||||
|
overlay.setProps({ layers, getTooltip });
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vibrant density scale: light cyan → teal → deep indigo
|
||||||
|
const DENSITY_GRADIENT: { t: number; color: [number, number, number] }[] = [
|
||||||
|
{ t: 0, color: [130, 234, 220] }, // Light cyan (few)
|
||||||
|
{ t: 0.5, color: [20, 140, 180] }, // Ocean blue (moderate)
|
||||||
|
{ t: 1, color: [88, 28, 140] }, // Deep indigo (many)
|
||||||
|
];
|
||||||
|
|
||||||
|
function countToColor(t: number): [number, number, number] {
|
||||||
|
if (t <= 0) return DENSITY_GRADIENT[0].color;
|
||||||
|
if (t >= 1) return DENSITY_GRADIENT[DENSITY_GRADIENT.length - 1].color;
|
||||||
|
|
||||||
|
for (let i = 0; i < DENSITY_GRADIENT.length - 1; i++) {
|
||||||
|
const lo = DENSITY_GRADIENT[i];
|
||||||
|
const hi = DENSITY_GRADIENT[i + 1];
|
||||||
|
if (t >= lo.t && t <= hi.t) {
|
||||||
|
const frac = (t - lo.t) / (hi.t - lo.t);
|
||||||
|
return [
|
||||||
|
Math.round(lo.color[0] + (hi.color[0] - lo.color[0]) * frac),
|
||||||
|
Math.round(lo.color[1] + (hi.color[1] - lo.color[1]) * frac),
|
||||||
|
Math.round(lo.color[2] + (hi.color[2] - lo.color[2]) * frac),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return DENSITY_GRADIENT[DENSITY_GRADIENT.length - 1].color;
|
||||||
|
}
|
||||||
|
|
||||||
|
function PostcodeSearch({
|
||||||
|
onFlyTo,
|
||||||
|
}: {
|
||||||
|
onFlyTo: (lat: number, lng: number, zoom: number) => void;
|
||||||
|
}) {
|
||||||
|
const [query, setQuery] = useState('');
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
|
||||||
|
const handleSubmit = useCallback(
|
||||||
|
async (e: React.FormEvent) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const trimmed = query.trim();
|
||||||
|
if (!trimmed) return;
|
||||||
|
|
||||||
|
setError(null);
|
||||||
|
setLoading(true);
|
||||||
|
try {
|
||||||
|
const res = await fetch(
|
||||||
|
`https://api.postcodes.io/postcodes/${encodeURIComponent(trimmed)}`
|
||||||
|
);
|
||||||
|
if (!res.ok) {
|
||||||
|
setError('Postcode not found');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const json = await res.json();
|
||||||
|
if (json.status === 200 && json.result) {
|
||||||
|
onFlyTo(json.result.latitude, json.result.longitude, 14);
|
||||||
|
setQuery('');
|
||||||
|
} else {
|
||||||
|
setError('Postcode not found');
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
setError('Lookup failed');
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[query, onFlyTo]
|
||||||
|
);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<form onSubmit={handleSubmit} className="absolute top-3 left-3 z-10 flex flex-col gap-1">
|
||||||
|
<div className="flex shadow-lg rounded overflow-hidden">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={query}
|
||||||
|
onChange={(e) => {
|
||||||
|
setQuery(e.target.value);
|
||||||
|
setError(null);
|
||||||
|
}}
|
||||||
|
placeholder="Search postcode..."
|
||||||
|
className="px-3 py-2 text-sm w-40 border-none outline-none bg-white dark:bg-navy-800 dark:text-warm-100 dark:placeholder-warm-500"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
type="submit"
|
||||||
|
disabled={loading}
|
||||||
|
className="px-3 py-2 bg-teal-600 text-white text-sm hover:bg-teal-700 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{loading ? '...' : 'Go'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{error && (
|
||||||
|
<span className="text-xs text-red-600 dark:text-red-400 bg-white/90 dark:bg-navy-800/90 rounded px-2 py-0.5 shadow">{error}</span>
|
||||||
|
)}
|
||||||
|
</form>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function MapLegend({
|
||||||
|
featureLabel,
|
||||||
|
range,
|
||||||
|
showCancel,
|
||||||
|
onCancel,
|
||||||
|
mode,
|
||||||
|
enumValues,
|
||||||
|
}: {
|
||||||
|
featureLabel: string;
|
||||||
|
range: [number, number];
|
||||||
|
showCancel: boolean;
|
||||||
|
onCancel: () => void;
|
||||||
|
mode: 'feature' | 'density';
|
||||||
|
enumValues?: string[];
|
||||||
|
}) {
|
||||||
|
const formatVal = (v: number) => {
|
||||||
|
if (Math.abs(v) >= 1_000_000) return `${(v / 1_000_000).toFixed(1)}M`;
|
||||||
|
if (Math.abs(v) >= 1_000) return `${(v / 1_000).toFixed(1)}k`;
|
||||||
|
if (Number.isInteger(v)) return v.toString();
|
||||||
|
return v.toFixed(1);
|
||||||
|
};
|
||||||
|
|
||||||
|
const gradientStyle =
|
||||||
|
mode === 'density'
|
||||||
|
? 'linear-gradient(to right, rgb(130, 234, 220), rgb(20, 140, 180), rgb(88, 28, 140))'
|
||||||
|
: 'linear-gradient(to right, rgb(46, 204, 113), rgb(241, 196, 15), rgb(231, 76, 60), rgb(142, 68, 173))';
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="absolute top-3 right-3 z-10 bg-white dark:bg-navy-800 dark:text-warm-200 rounded shadow-lg p-3 text-xs min-w-[160px]">
|
||||||
|
<div className="flex items-center justify-between mb-2">
|
||||||
|
<span className="font-semibold text-sm">{featureLabel}</span>
|
||||||
|
{showCancel && (
|
||||||
|
<button
|
||||||
|
onClick={onCancel}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 ml-2"
|
||||||
|
title="Clear color view"
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
className="w-4 h-4"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
strokeWidth={2}
|
||||||
|
>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="h-3 rounded"
|
||||||
|
style={{ background: gradientStyle }}
|
||||||
|
/>
|
||||||
|
<div className="flex justify-between mt-1 text-warm-600 dark:text-warm-400">
|
||||||
|
{mode === 'density' ? (
|
||||||
|
<>
|
||||||
|
<span>Few</span>
|
||||||
|
<span>Many</span>
|
||||||
|
</>
|
||||||
|
) : enumValues && enumValues.length > 0 ? (
|
||||||
|
<>
|
||||||
|
<span>{enumValues[0]}</span>
|
||||||
|
<span>{enumValues[enumValues.length - 1]}</span>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<span>{formatVal(range[0])}</span>
|
||||||
|
<span>{formatVal(range[1])}</span>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default memo(function Map({
|
||||||
|
data,
|
||||||
|
pois,
|
||||||
|
onViewChange,
|
||||||
|
viewFeature,
|
||||||
|
colorRange,
|
||||||
|
filterRange,
|
||||||
|
viewSource,
|
||||||
|
onCancelPin,
|
||||||
|
features,
|
||||||
|
selectedHexagonId,
|
||||||
|
hoveredHexagonId,
|
||||||
|
onHexagonClick,
|
||||||
|
onHexagonHover,
|
||||||
|
initialViewState,
|
||||||
|
theme = 'light',
|
||||||
|
}: MapProps) {
|
||||||
const containerRef = useRef<HTMLDivElement>(null);
|
const containerRef = useRef<HTMLDivElement>(null);
|
||||||
const [viewState, setViewState] = useState<ViewState>(INITIAL_VIEW);
|
const [viewState, setViewState] = useState<ViewState>(initialViewState || INITIAL_VIEW);
|
||||||
const [dimensions, setDimensions] = useState<Dimensions>({ width: 0, height: 0 });
|
const [dimensions, setDimensions] = useState<Dimensions>({ width: 0, height: 0 });
|
||||||
|
|
||||||
// Track container dimensions with ResizeObserver
|
// Track container dimensions with ResizeObserver
|
||||||
|
|
@ -218,18 +359,69 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (dimensions.width === 0 || dimensions.height === 0) return;
|
if (dimensions.width === 0 || dimensions.height === 0) return;
|
||||||
|
|
||||||
const bounds = getBoundsFromViewState(viewState, dimensions.width, dimensions.height);
|
const raw = getBoundsFromViewState(viewState, dimensions.width, dimensions.height);
|
||||||
const resolution = zoomToResolution(viewState.zoom);
|
const resolution = zoomToResolution(viewState.zoom);
|
||||||
|
|
||||||
onViewChange({ resolution, bounds, zoom: viewState.zoom });
|
// Quantize bounds to 0.01° to reduce state churn and improve backend cache hits
|
||||||
|
const QUANT = 0.01;
|
||||||
|
const bounds: Bounds = {
|
||||||
|
south: Math.floor(raw.south / QUANT) * QUANT,
|
||||||
|
west: Math.floor(raw.west / QUANT) * QUANT,
|
||||||
|
north: Math.ceil(raw.north / QUANT) * QUANT,
|
||||||
|
east: Math.ceil(raw.east / QUANT) * QUANT,
|
||||||
|
};
|
||||||
|
|
||||||
|
onViewChange({
|
||||||
|
resolution,
|
||||||
|
bounds,
|
||||||
|
zoom: viewState.zoom,
|
||||||
|
latitude: viewState.latitude,
|
||||||
|
longitude: viewState.longitude,
|
||||||
|
});
|
||||||
}, [viewState, dimensions, onViewChange]);
|
}, [viewState, dimensions, onViewChange]);
|
||||||
|
|
||||||
const handleViewStateChange = useCallback((params: { viewState: unknown }) => {
|
const handleMove = useCallback((evt: { viewState: ViewState }) => {
|
||||||
const newViewState = params.viewState as ViewState;
|
setViewState(evt.viewState);
|
||||||
setViewState(newViewState);
|
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Popup state for POI hover (using screen coordinates)
|
const handleFlyTo = useCallback((lat: number, lng: number, zoom: number) => {
|
||||||
|
setViewState((prev) => ({ ...prev, latitude: lat, longitude: lng, zoom }));
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const themeRef = useRef(theme);
|
||||||
|
themeRef.current = theme;
|
||||||
|
|
||||||
|
// Make place labels more legible over the colored hexagons
|
||||||
|
const handleMapLoad = useCallback(
|
||||||
|
(evt: { target: MapRef['getMap'] extends () => infer M ? M : never }) => {
|
||||||
|
const map = evt.target;
|
||||||
|
if (themeRef.current === 'light') {
|
||||||
|
for (const layer of map.getStyle().layers || []) {
|
||||||
|
if (layer.type !== 'symbol') continue;
|
||||||
|
map.setPaintProperty(layer.id, 'text-halo-color', 'rgba(255,255,255,1)');
|
||||||
|
map.setPaintProperty(layer.id, 'text-halo-width', 2);
|
||||||
|
map.setPaintProperty(layer.id, 'text-color', '#222');
|
||||||
|
}
|
||||||
|
// Make water more prominent
|
||||||
|
for (const layer of map.getStyle().layers || []) {
|
||||||
|
if (layer.id === 'water' || layer.id.startsWith('water')) {
|
||||||
|
map.setPaintProperty(layer.id, 'fill-color', '#6baed6');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
map.setLayoutProperty('building', 'visibility', 'none');
|
||||||
|
map.setLayoutProperty('building-top', 'visibility', 'none');
|
||||||
|
} catch {
|
||||||
|
// layers may not exist in dark style
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[]
|
||||||
|
);
|
||||||
|
|
||||||
|
const mapStyle = theme === 'dark' ? MAP_STYLE_DARK : MAP_STYLE_LIGHT;
|
||||||
|
|
||||||
|
// Popup state for POI hover
|
||||||
const [popupInfo, setPopupInfo] = useState<{
|
const [popupInfo, setPopupInfo] = useState<{
|
||||||
x: number;
|
x: number;
|
||||||
y: number;
|
y: number;
|
||||||
|
|
@ -250,24 +442,149 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
|
||||||
}
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const layers = useMemo(
|
// Compute count range for count-based coloring
|
||||||
() => [
|
const countRange = useMemo(() => {
|
||||||
|
if (data.length === 0) return { min: 0, max: 1 };
|
||||||
|
let min = Infinity;
|
||||||
|
let max = -Infinity;
|
||||||
|
for (const d of data) {
|
||||||
|
const c = d.count as number;
|
||||||
|
if (c < min) min = c;
|
||||||
|
if (c > max) max = c;
|
||||||
|
}
|
||||||
|
if (min === max) return { min, max: min + 1 };
|
||||||
|
return { min, max };
|
||||||
|
}, [data]);
|
||||||
|
|
||||||
|
// Memoize feature lookup to avoid new reference each render
|
||||||
|
const colorFeatureMeta = useMemo(
|
||||||
|
() => (viewFeature ? features.find((f) => f.name === viewFeature) || null : null),
|
||||||
|
[viewFeature, features]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Use refs for values that change during drag so layers aren't recreated
|
||||||
|
const viewFeatureRef = useRef(viewFeature);
|
||||||
|
viewFeatureRef.current = viewFeature;
|
||||||
|
const colorRangeRef = useRef(colorRange);
|
||||||
|
colorRangeRef.current = colorRange;
|
||||||
|
const filterRangeRef = useRef(filterRange);
|
||||||
|
filterRangeRef.current = filterRange;
|
||||||
|
const colorFeatureMetaRef = useRef(colorFeatureMeta);
|
||||||
|
colorFeatureMetaRef.current = colorFeatureMeta;
|
||||||
|
const countRangeRef = useRef(countRange);
|
||||||
|
countRangeRef.current = countRange;
|
||||||
|
const selectedHexagonIdRef = useRef(selectedHexagonId);
|
||||||
|
selectedHexagonIdRef.current = selectedHexagonId;
|
||||||
|
const hoveredHexagonIdRef = useRef(hoveredHexagonId);
|
||||||
|
hoveredHexagonIdRef.current = hoveredHexagonId;
|
||||||
|
|
||||||
|
// Stable click handler using ref
|
||||||
|
const onHexagonClickRef = useRef(onHexagonClick);
|
||||||
|
onHexagonClickRef.current = onHexagonClick;
|
||||||
|
const handleHexagonClick = useCallback((info: PickingInfo<HexagonData>) => {
|
||||||
|
if (info.object && 'h3' in info.object) {
|
||||||
|
onHexagonClickRef.current(info.object.h3);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Stable hover handler using ref
|
||||||
|
const onHexagonHoverRef = useRef(onHexagonHover);
|
||||||
|
onHexagonHoverRef.current = onHexagonHover;
|
||||||
|
const handleHexagonHover = useCallback((info: PickingInfo<HexagonData>) => {
|
||||||
|
if (info.object && 'h3' in info.object) {
|
||||||
|
onHexagonHoverRef.current(info.object.h3);
|
||||||
|
} else {
|
||||||
|
onHexagonHoverRef.current(null);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Stable hover handler using ref
|
||||||
|
const handlePoiHoverRef = useRef(handlePoiHover);
|
||||||
|
handlePoiHoverRef.current = handlePoiHover;
|
||||||
|
const stablePoiHover = useCallback((info: PickingInfo<POI>) => {
|
||||||
|
handlePoiHoverRef.current(info);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Derive a trigger value from color-affecting state — avoids useEffect+setState double-render
|
||||||
|
const colorTrigger = `${viewFeature}|${colorRange?.[0]}|${colorRange?.[1]}|${filterRange?.[0]}|${filterRange?.[1]}|${countRange.min}|${countRange.max}|${selectedHexagonId}|${hoveredHexagonId}`;
|
||||||
|
|
||||||
|
// Hexagon layer — only recreated when data or color trigger changes
|
||||||
|
const hexLayer = useMemo(
|
||||||
|
() =>
|
||||||
new H3HexagonLayer<HexagonData>({
|
new H3HexagonLayer<HexagonData>({
|
||||||
id: 'h3-hexagons',
|
id: 'h3-hexagons',
|
||||||
data,
|
data,
|
||||||
getHexagon: (d) => d.h3,
|
getHexagon: (d) => d.h3,
|
||||||
getFillColor: (d) => priceToColor(d.avg_price),
|
getFillColor: (d) => {
|
||||||
|
const vf = viewFeatureRef.current;
|
||||||
|
const clr = colorRangeRef.current;
|
||||||
|
const fr = filterRangeRef.current;
|
||||||
|
const cfm = colorFeatureMetaRef.current;
|
||||||
|
if (vf && clr && cfm) {
|
||||||
|
const val = d[`min_${vf}`];
|
||||||
|
if (val == null) return [128, 128, 128, 80] as [number, number, number, number];
|
||||||
|
// Gray out hexagons outside filter range
|
||||||
|
if (fr) {
|
||||||
|
const minVal = d[`min_${vf}`] as number;
|
||||||
|
const maxVal = d[`max_${vf}`] as number;
|
||||||
|
if (maxVal < fr[0] || minVal > fr[1]) {
|
||||||
|
return [180, 180, 180, 60] as [number, number, number, number];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Color using full slider range
|
||||||
|
const range = clr[1] - clr[0];
|
||||||
|
if (range === 0) return [...GRADIENT[0].color, 200] as [number, number, number, number];
|
||||||
|
const t = ((val as number) - clr[0]) / range;
|
||||||
|
const rgb = normalizedToColor(Math.max(0, Math.min(1, t)));
|
||||||
|
return [...rgb, 200] as [number, number, number, number];
|
||||||
|
}
|
||||||
|
const cr = countRangeRef.current;
|
||||||
|
const c = d.count as number;
|
||||||
|
const t = (c - cr.min) / (cr.max - cr.min);
|
||||||
|
return [...countToColor(Math.max(0, Math.min(1, t))), 200] as [
|
||||||
|
number,
|
||||||
|
number,
|
||||||
|
number,
|
||||||
|
number,
|
||||||
|
];
|
||||||
|
},
|
||||||
|
getLineColor: (d) => {
|
||||||
|
if (d.h3 === selectedHexagonIdRef.current) return [255, 255, 255, 255] as [number, number, number, number];
|
||||||
|
if (d.h3 === hoveredHexagonIdRef.current) return [29, 228, 195, 200] as [number, number, number, number];
|
||||||
|
return [0, 0, 0, 0] as [number, number, number, number];
|
||||||
|
},
|
||||||
|
getLineWidth: (d) => {
|
||||||
|
if (d.h3 === selectedHexagonIdRef.current) return 3;
|
||||||
|
if (d.h3 === hoveredHexagonIdRef.current) return 2;
|
||||||
|
return 0;
|
||||||
|
},
|
||||||
|
lineWidthUnits: 'pixels',
|
||||||
|
updateTriggers: {
|
||||||
|
getFillColor: [colorTrigger],
|
||||||
|
getLineColor: [colorTrigger],
|
||||||
|
getLineWidth: [colorTrigger],
|
||||||
|
},
|
||||||
extruded: false,
|
extruded: false,
|
||||||
pickable: true,
|
pickable: true,
|
||||||
opacity: 0.5,
|
opacity: 1,
|
||||||
highPrecision: true,
|
highPrecision: true,
|
||||||
|
onClick: handleHexagonClick,
|
||||||
|
onHover: handleHexagonHover,
|
||||||
|
// @ts-expect-error beforeId is a MapboxOverlay interleave prop, not typed in LayerProps
|
||||||
|
beforeId: 'waterway_label',
|
||||||
}),
|
}),
|
||||||
|
[data, colorTrigger, handleHexagonClick, handleHexagonHover]
|
||||||
|
);
|
||||||
|
|
||||||
|
// POI layer — independent, only recreated when POI data changes
|
||||||
|
const poiLayer = useMemo(
|
||||||
|
() =>
|
||||||
new IconLayer<POI>({
|
new IconLayer<POI>({
|
||||||
id: 'poi-icons',
|
id: 'poi-icons',
|
||||||
data: pois,
|
data: pois,
|
||||||
getPosition: (d) => [d.lng, d.lat],
|
getPosition: (d) => [d.lng, d.lat],
|
||||||
getIcon: (d) => ({
|
getIcon: (d) => ({
|
||||||
url: getPOIIconUrl(d.category),
|
url: emojiToTwemojiUrl(d.emoji),
|
||||||
width: 72,
|
width: 72,
|
||||||
height: 72,
|
height: 72,
|
||||||
}),
|
}),
|
||||||
|
|
@ -275,48 +592,89 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
|
||||||
sizeMinPixels: 20,
|
sizeMinPixels: 20,
|
||||||
sizeMaxPixels: 40,
|
sizeMaxPixels: 40,
|
||||||
pickable: true,
|
pickable: true,
|
||||||
onHover: handlePoiHover,
|
onHover: stablePoiHover,
|
||||||
}),
|
}),
|
||||||
],
|
[pois, stablePoiHover]
|
||||||
[data, pois, handlePoiHover]
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Postcode labels on high-res hexagons (resolution 11+, zoom >= 13)
|
||||||
|
const postcodeData = useMemo(
|
||||||
|
() => data.filter((d) => d.postcode && d.lat != null && d.lon != null),
|
||||||
|
[data]
|
||||||
|
);
|
||||||
|
|
||||||
// Tooltip for hexagons only (POIs use MapLibre popup)
|
const showPostcodes = viewState.zoom >= 13;
|
||||||
const getTooltip = useCallback(({ object }: { object?: HexagonData }) => {
|
const postcodeLayer = useMemo(
|
||||||
if (!object || !('h3' in object)) return null;
|
() =>
|
||||||
|
showPostcodes
|
||||||
|
? new TextLayer<HexagonData>({
|
||||||
|
id: 'postcode-labels',
|
||||||
|
data: postcodeData,
|
||||||
|
getPosition: (d) => [d.lon as number, d.lat as number],
|
||||||
|
getText: (d) => d.postcode as string,
|
||||||
|
getSize: 11,
|
||||||
|
getColor: theme === 'dark' ? [220, 220, 220, 220] : [30, 30, 30, 220],
|
||||||
|
getTextAnchor: 'middle',
|
||||||
|
getAlignmentBaseline: 'center',
|
||||||
|
fontFamily: 'Inter, system-ui, sans-serif',
|
||||||
|
fontWeight: 600,
|
||||||
|
outlineWidth: 2,
|
||||||
|
outlineColor: theme === 'dark' ? [30, 30, 30, 200] : [255, 255, 255, 200],
|
||||||
|
billboard: false,
|
||||||
|
sizeUnits: 'pixels',
|
||||||
|
sizeMinPixels: 10,
|
||||||
|
sizeMaxPixels: 14,
|
||||||
|
})
|
||||||
|
: null,
|
||||||
|
[postcodeData, showPostcodes, theme]
|
||||||
|
);
|
||||||
|
|
||||||
const hex = object as HexagonData;
|
const layers = useMemo(
|
||||||
return {
|
() => [hexLayer, poiLayer, ...(postcodeLayer ? [postcodeLayer] : [])],
|
||||||
html: `<div style="padding: 8px; font-size: 14px;">
|
[hexLayer, poiLayer, postcodeLayer]
|
||||||
<strong>Avg: £${hex.avg_price?.toLocaleString() || 'N/A'}</strong>
|
);
|
||||||
<div style="color: #666; font-size: 12px;">
|
|
||||||
${hex.count} sales<br/>
|
|
||||||
Range: £${hex.min_price?.toLocaleString()} - £${hex.max_price?.toLocaleString()}
|
|
||||||
</div>
|
|
||||||
</div>`,
|
|
||||||
style: {
|
|
||||||
backgroundColor: 'white',
|
|
||||||
borderRadius: '4px',
|
|
||||||
boxShadow: '0 2px 4px rgba(0,0,0,0.2)',
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex-1 h-full relative" ref={containerRef}>
|
<div className="flex-1 h-full relative" ref={containerRef}>
|
||||||
<DeckGL
|
<MapGL
|
||||||
viewState={viewState}
|
{...viewState}
|
||||||
controller
|
onMove={handleMove}
|
||||||
layers={layers}
|
onLoad={handleMapLoad as never}
|
||||||
onViewStateChange={handleViewStateChange as never}
|
mapStyle={mapStyle}
|
||||||
getTooltip={getTooltip as never}
|
style={{ width: '100%', height: '100%' }}
|
||||||
|
attributionControl={false}
|
||||||
|
dragRotate={false}
|
||||||
|
touchZoomRotate={true}
|
||||||
|
touchPitch={false}
|
||||||
|
keyboard={true}
|
||||||
|
pitchWithRotate={false}
|
||||||
|
minZoom={5}
|
||||||
|
maxBounds={[-12, 49, 4, 62]}
|
||||||
>
|
>
|
||||||
<MapGL mapStyle={MAP_STYLE} />
|
<DeckOverlay layers={layers} getTooltip={null} />
|
||||||
</DeckGL>
|
</MapGL>
|
||||||
|
<PostcodeSearch onFlyTo={handleFlyTo} />
|
||||||
|
{viewFeature && colorRange && colorFeatureMeta ? (
|
||||||
|
<MapLegend
|
||||||
|
featureLabel={colorFeatureMeta.name}
|
||||||
|
range={colorRange}
|
||||||
|
showCancel={viewSource === 'eye'}
|
||||||
|
onCancel={onCancelPin}
|
||||||
|
mode="feature"
|
||||||
|
enumValues={colorFeatureMeta.type === 'enum' ? colorFeatureMeta.values : undefined}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<MapLegend
|
||||||
|
featureLabel="Property density"
|
||||||
|
range={[0, 0]}
|
||||||
|
showCancel={false}
|
||||||
|
onCancel={onCancelPin}
|
||||||
|
mode="density"
|
||||||
|
/>
|
||||||
|
)}
|
||||||
{popupInfo && (
|
{popupInfo && (
|
||||||
<div
|
<div
|
||||||
className="absolute pointer-events-none bg-white rounded shadow-lg p-2 text-sm"
|
className="absolute pointer-events-none bg-white dark:bg-navy-800 rounded shadow-lg p-2 text-sm dark:text-warm-200"
|
||||||
style={{
|
style={{
|
||||||
left: popupInfo.x,
|
left: popupInfo.x,
|
||||||
top: popupInfo.y - 40,
|
top: popupInfo.y - 40,
|
||||||
|
|
@ -324,14 +682,10 @@ export default function Map({ data, pois, onViewChange }: MapProps) {
|
||||||
zIndex: 9999,
|
zIndex: 9999,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<strong>
|
<strong>{popupInfo.name}</strong>
|
||||||
{getTooltipEmoji(popupInfo.category)} {popupInfo.name}
|
<div className="text-gray-500 dark:text-warm-400 text-xs">{popupInfo.category}</div>
|
||||||
</strong>
|
|
||||||
<div className="text-gray-500 text-xs">
|
|
||||||
{popupInfo.category.replace(/_/g, ' ')}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
});
|
||||||
|
|
|
||||||
297
frontend/src/components/POIPane.tsx
Normal file
297
frontend/src/components/POIPane.tsx
Normal file
|
|
@ -0,0 +1,297 @@
|
||||||
|
import { useState, useRef, useEffect, useCallback } from 'react';
|
||||||
|
import type { POICategoryGroup } from '../types';
|
||||||
|
|
||||||
|
interface POIPaneProps {
|
||||||
|
groups: POICategoryGroup[];
|
||||||
|
selectedCategories: Set<string>;
|
||||||
|
onCategoriesChange: (categories: Set<string>) => void;
|
||||||
|
poiCount: number;
|
||||||
|
onNavigateToSource?: (slug: string) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function POIPane({
|
||||||
|
groups,
|
||||||
|
selectedCategories,
|
||||||
|
onCategoriesChange,
|
||||||
|
poiCount,
|
||||||
|
onNavigateToSource,
|
||||||
|
}: POIPaneProps) {
|
||||||
|
const [dropdownOpen, setDropdownOpen] = useState(false);
|
||||||
|
const [searchTerm, setSearchTerm] = useState('');
|
||||||
|
const [collapsedGroups, setCollapsedGroups] = useState<Set<string>>(new Set());
|
||||||
|
const [showInfo, setShowInfo] = useState(false);
|
||||||
|
const dropdownRef = useRef<HTMLDivElement>(null);
|
||||||
|
const infoPopupRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
// Close dropdown when clicking outside
|
||||||
|
useEffect(() => {
|
||||||
|
function handleClickOutside(event: MouseEvent) {
|
||||||
|
if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
|
||||||
|
setDropdownOpen(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
|
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Close info popup when clicking outside
|
||||||
|
useEffect(() => {
|
||||||
|
if (!showInfo) return;
|
||||||
|
function handleClickOutside(e: MouseEvent) {
|
||||||
|
if (infoPopupRef.current && !infoPopupRef.current.contains(e.target as Node)) {
|
||||||
|
setShowInfo(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
|
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
}, [showInfo]);
|
||||||
|
|
||||||
|
const allCategories = groups.flatMap((g) => g.categories);
|
||||||
|
|
||||||
|
const toggleCategory = (category: string) => {
|
||||||
|
const newSet = new Set(selectedCategories);
|
||||||
|
if (newSet.has(category)) {
|
||||||
|
newSet.delete(category);
|
||||||
|
} else {
|
||||||
|
newSet.add(category);
|
||||||
|
}
|
||||||
|
onCategoriesChange(newSet);
|
||||||
|
};
|
||||||
|
|
||||||
|
const selectAll = () => {
|
||||||
|
onCategoriesChange(new Set(allCategories));
|
||||||
|
};
|
||||||
|
|
||||||
|
const selectNone = () => {
|
||||||
|
onCategoriesChange(new Set());
|
||||||
|
};
|
||||||
|
|
||||||
|
const toggleGroup = useCallback(
|
||||||
|
(groupName: string) => {
|
||||||
|
const group = groups.find((g) => g.name === groupName);
|
||||||
|
if (!group) return;
|
||||||
|
const allSelected = group.categories.every((c) => selectedCategories.has(c));
|
||||||
|
const newSet = new Set(selectedCategories);
|
||||||
|
if (allSelected) {
|
||||||
|
group.categories.forEach((c) => newSet.delete(c));
|
||||||
|
} else {
|
||||||
|
group.categories.forEach((c) => newSet.add(c));
|
||||||
|
}
|
||||||
|
onCategoriesChange(newSet);
|
||||||
|
},
|
||||||
|
[groups, selectedCategories, onCategoriesChange]
|
||||||
|
);
|
||||||
|
|
||||||
|
const toggleCollapse = (groupName: string) => {
|
||||||
|
setCollapsedGroups((prev) => {
|
||||||
|
const next = new Set(prev);
|
||||||
|
if (next.has(groupName)) {
|
||||||
|
next.delete(groupName);
|
||||||
|
} else {
|
||||||
|
next.add(groupName);
|
||||||
|
}
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const lowerSearch = searchTerm.toLowerCase();
|
||||||
|
|
||||||
|
// Filter groups and categories by search term
|
||||||
|
const filteredGroups = groups
|
||||||
|
.map((group) => {
|
||||||
|
if (!searchTerm) return group;
|
||||||
|
const matchingCats = group.categories.filter((c) => c.toLowerCase().includes(lowerSearch));
|
||||||
|
const groupMatches = group.name.toLowerCase().includes(lowerSearch);
|
||||||
|
if (groupMatches) return group;
|
||||||
|
if (matchingCats.length === 0) return null;
|
||||||
|
return { ...group, categories: matchingCats };
|
||||||
|
})
|
||||||
|
.filter(Boolean) as POICategoryGroup[];
|
||||||
|
|
||||||
|
const selectedCount = selectedCategories.size;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="w-72 p-4 bg-white dark:bg-navy-950 shadow-lg space-y-4 overflow-y-auto max-h-screen">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<h2 className="text-xl font-bold dark:text-warm-100">Points of Interest</h2>
|
||||||
|
<button
|
||||||
|
onClick={() => setShowInfo(true)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
|
||||||
|
title="Data source info"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<circle cx="12" cy="12" r="10" />
|
||||||
|
<path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{showInfo && (
|
||||||
|
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
|
||||||
|
<div
|
||||||
|
ref={infoPopupRef}
|
||||||
|
className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between mb-3">
|
||||||
|
<h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
|
||||||
|
Points of Interest
|
||||||
|
</h3>
|
||||||
|
<button
|
||||||
|
onClick={() => setShowInfo(false)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">
|
||||||
|
Points of interest are sourced from OpenStreetMap via Geofabrik extracts.
|
||||||
|
Categories include public transport stops, shops, restaurants, healthcare
|
||||||
|
facilities, leisure venues, and more. Data is filtered and mapped to
|
||||||
|
friendly names with exhaustive category coverage.
|
||||||
|
</p>
|
||||||
|
{onNavigateToSource && (
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
onNavigateToSource('osm-pois');
|
||||||
|
setShowInfo(false);
|
||||||
|
}}
|
||||||
|
className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
View data source
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="space-y-2" ref={dropdownRef}>
|
||||||
|
<button
|
||||||
|
onClick={() => setDropdownOpen(!dropdownOpen)}
|
||||||
|
className="w-full flex items-center justify-between px-3 py-2 text-sm border border-warm-300 dark:border-navy-700 rounded hover:border-warm-400 bg-white dark:bg-navy-800 dark:text-warm-200"
|
||||||
|
>
|
||||||
|
<span className="truncate text-left">
|
||||||
|
{selectedCount === 0
|
||||||
|
? 'Select categories...'
|
||||||
|
: selectedCount === allCategories.length
|
||||||
|
? 'All categories'
|
||||||
|
: `${selectedCount} selected`}
|
||||||
|
</span>
|
||||||
|
<svg
|
||||||
|
className={`w-4 h-4 ml-2 flex-shrink-0 transition-transform ${dropdownOpen ? 'rotate-180' : ''}`}
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{dropdownOpen && (
|
||||||
|
<div className="border border-warm-300 dark:border-navy-700 rounded shadow-lg bg-white dark:bg-navy-800">
|
||||||
|
<div className="flex gap-2 px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<button onClick={selectAll} className="text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300">
|
||||||
|
All
|
||||||
|
</button>
|
||||||
|
<span className="text-xs text-warm-300 dark:text-warm-600">|</span>
|
||||||
|
<button onClick={selectNone} className="text-xs text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300">
|
||||||
|
None
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className="px-3 py-2 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
placeholder="Search categories..."
|
||||||
|
value={searchTerm}
|
||||||
|
onChange={(e) => setSearchTerm(e.target.value)}
|
||||||
|
className="w-full px-2 py-1 text-sm border border-warm-300 dark:border-navy-700 rounded bg-white dark:bg-navy-950 dark:text-warm-200 dark:placeholder-warm-500"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="max-h-96 overflow-y-auto py-1">
|
||||||
|
{filteredGroups.map((group) => {
|
||||||
|
const groupSelected = group.categories.filter((c) =>
|
||||||
|
selectedCategories.has(c)
|
||||||
|
).length;
|
||||||
|
const allInGroupSelected = groupSelected === group.categories.length;
|
||||||
|
const someInGroupSelected = groupSelected > 0 && !allInGroupSelected;
|
||||||
|
const isCollapsed = collapsedGroups.has(group.name) && !searchTerm;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div key={group.name}>
|
||||||
|
<div className="flex items-center gap-1 px-3 py-1.5 bg-warm-50 dark:bg-navy-950 border-y border-warm-100 dark:border-navy-700">
|
||||||
|
<button
|
||||||
|
onClick={() => toggleCollapse(group.name)}
|
||||||
|
className="p-0.5 text-warm-400 hover:text-warm-600"
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
className={`w-3 h-3 transition-transform ${isCollapsed ? '' : 'rotate-90'}`}
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
>
|
||||||
|
<path
|
||||||
|
strokeLinecap="round"
|
||||||
|
strokeLinejoin="round"
|
||||||
|
strokeWidth={2}
|
||||||
|
d="M9 5l7 7-7 7"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
<label className="flex items-center gap-2 flex-1 cursor-pointer">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={allInGroupSelected}
|
||||||
|
ref={(el) => {
|
||||||
|
if (el) el.indeterminate = someInGroupSelected;
|
||||||
|
}}
|
||||||
|
onChange={() => toggleGroup(group.name)}
|
||||||
|
className="rounded accent-teal-600"
|
||||||
|
/>
|
||||||
|
<span className="text-xs font-semibold text-warm-700 dark:text-warm-300">{group.name}</span>
|
||||||
|
</label>
|
||||||
|
<span className="text-xs text-warm-400">
|
||||||
|
{groupSelected}/{group.categories.length}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{!isCollapsed &&
|
||||||
|
group.categories.map((category) => (
|
||||||
|
<label
|
||||||
|
key={category}
|
||||||
|
className="flex items-center gap-2 px-3 pl-8 py-1.5 hover:bg-warm-50 dark:hover:bg-navy-700 cursor-pointer dark:text-warm-300"
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={selectedCategories.has(category)}
|
||||||
|
onChange={() => toggleCategory(category)}
|
||||||
|
className="rounded accent-teal-600"
|
||||||
|
/>
|
||||||
|
<span className="text-sm flex-1">{category}</span>
|
||||||
|
</label>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{selectedCount > 0 && (
|
||||||
|
<div className="p-3 bg-teal-50 dark:bg-teal-900/30 rounded text-sm">
|
||||||
|
<div className="font-medium text-teal-900 dark:text-teal-300">
|
||||||
|
{poiCount.toLocaleString()} POI{poiCount !== 1 ? 's' : ''} visible
|
||||||
|
</div>
|
||||||
|
<div className="text-xs text-teal-700 dark:text-teal-400 mt-1">
|
||||||
|
{selectedCount} categor{selectedCount !== 1 ? 'ies' : 'y'} selected
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div className="p-3 bg-warm-100 dark:bg-navy-800 rounded text-xs text-warm-600 dark:text-warm-400">
|
||||||
|
<p>Select categories to display POIs on the map.</p>
|
||||||
|
<p className="mt-2">Zoom in for better visibility of individual locations.</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
316
frontend/src/components/PropertiesPane.tsx
Normal file
316
frontend/src/components/PropertiesPane.tsx
Normal file
|
|
@ -0,0 +1,316 @@
|
||||||
|
import React, { useMemo, useState, useRef, useEffect } from 'react';
|
||||||
|
import { Property } from '../types';
|
||||||
|
|
||||||
|
interface PropertiesPaneProps {
|
||||||
|
properties: Property[];
|
||||||
|
total: number;
|
||||||
|
loading: boolean;
|
||||||
|
hexagonId: string | null;
|
||||||
|
onLoadMore: () => void;
|
||||||
|
onClose: () => void;
|
||||||
|
onNavigateToSource?: (slug: string) => void;
|
||||||
|
isHoveredPreview?: boolean;
|
||||||
|
hoverMode?: boolean;
|
||||||
|
onHoverModeChange?: (enabled: boolean) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
type SortBy = 'price' | 'size' | 'energy';
|
||||||
|
|
||||||
|
export function PropertiesPane({
|
||||||
|
properties,
|
||||||
|
total,
|
||||||
|
loading,
|
||||||
|
hexagonId,
|
||||||
|
onLoadMore,
|
||||||
|
onClose,
|
||||||
|
onNavigateToSource,
|
||||||
|
isHoveredPreview,
|
||||||
|
hoverMode,
|
||||||
|
onHoverModeChange,
|
||||||
|
}: PropertiesPaneProps) {
|
||||||
|
const [sortBy, setSortBy] = useState<SortBy>('price');
|
||||||
|
const [search, setSearch] = useState('');
|
||||||
|
const [showInfo, setShowInfo] = useState(false);
|
||||||
|
const infoPopupRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!showInfo) return;
|
||||||
|
function handleClickOutside(e: MouseEvent) {
|
||||||
|
if (infoPopupRef.current && !infoPopupRef.current.contains(e.target as Node)) {
|
||||||
|
setShowInfo(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
|
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
}, [showInfo]);
|
||||||
|
|
||||||
|
// Filter and sort properties
|
||||||
|
const filteredAndSorted = useMemo(() => {
|
||||||
|
const query = search.trim().toLowerCase();
|
||||||
|
const filtered = query
|
||||||
|
? properties.filter((p) => {
|
||||||
|
const addr = (p.address || '').toLowerCase();
|
||||||
|
const pc = (p.postcode || '').toLowerCase();
|
||||||
|
return addr.includes(query) || pc.includes(query);
|
||||||
|
})
|
||||||
|
: properties;
|
||||||
|
return [...filtered].sort((a, b) => {
|
||||||
|
switch (sortBy) {
|
||||||
|
case 'price':
|
||||||
|
return ((b.latest_price as number) || 0) - ((a.latest_price as number) || 0);
|
||||||
|
case 'size':
|
||||||
|
return ((b.total_floor_area as number) || 0) - ((a.total_floor_area as number) || 0);
|
||||||
|
case 'energy':
|
||||||
|
return (a.current_energy_rating || 'Z').localeCompare(b.current_energy_rating || 'Z');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}, [properties, sortBy, search]);
|
||||||
|
|
||||||
|
if (!hexagonId) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-center h-full text-warm-500 dark:text-warm-400">
|
||||||
|
Click a hexagon to view properties
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col h-full">
|
||||||
|
{/* Header */}
|
||||||
|
<div className="p-4 border-b border-warm-200 dark:border-navy-700">
|
||||||
|
<div className="flex justify-between items-center">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<h2 className="text-lg font-semibold dark:text-warm-100">Properties</h2>
|
||||||
|
{isHoveredPreview && (
|
||||||
|
<span className="text-xs px-1.5 py-0.5 rounded bg-teal-50 dark:bg-teal-900/30 text-teal-600 dark:text-teal-400">
|
||||||
|
Preview
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={() => setShowInfo(true)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-0.5 rounded"
|
||||||
|
title="Data source info"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<circle cx="12" cy="12" r="10" />
|
||||||
|
<path strokeLinecap="round" d="M12 16v-4m0-4h.01" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-1">
|
||||||
|
{onHoverModeChange && (
|
||||||
|
<button
|
||||||
|
onClick={() => onHoverModeChange(!hoverMode)}
|
||||||
|
className={`p-1 rounded ${
|
||||||
|
hoverMode
|
||||||
|
? 'text-teal-600 dark:text-teal-400 bg-teal-50 dark:bg-teal-900/30'
|
||||||
|
: 'text-warm-400 hover:text-warm-700 dark:hover:text-warm-300'
|
||||||
|
}`}
|
||||||
|
title={hoverMode ? 'Live preview on (click to lock)' : 'Live preview off (click to enable)'}
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z" />
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
<button
|
||||||
|
onClick={onClose}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 p-1"
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-warm-600 dark:text-warm-400">
|
||||||
|
{search.trim()
|
||||||
|
? `${filteredAndSorted.length} match${filteredAndSorted.length !== 1 ? 'es' : ''} in ${properties.length} loaded`
|
||||||
|
: `Showing ${properties.length} of ${total} properties`}
|
||||||
|
</p>
|
||||||
|
{showInfo && (
|
||||||
|
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/30">
|
||||||
|
<div
|
||||||
|
ref={infoPopupRef}
|
||||||
|
className="bg-white dark:bg-navy-800 border border-warm-200 dark:border-navy-700 rounded-lg shadow-xl max-w-md w-full mx-4 p-5"
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between mb-3">
|
||||||
|
<h3 className="text-sm font-semibold text-warm-900 dark:text-warm-100 pr-4">
|
||||||
|
Property Data
|
||||||
|
</h3>
|
||||||
|
<button
|
||||||
|
onClick={() => setShowInfo(false)}
|
||||||
|
className="text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shrink-0"
|
||||||
|
>
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth={2}>
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p className="text-sm text-warm-700 dark:text-warm-300 mb-4 leading-relaxed">
|
||||||
|
Property data combines Energy Performance Certificates (EPC) with HM Land
|
||||||
|
Registry Price Paid records, fuzzy-matched by address within each postcode.
|
||||||
|
Includes floor area, energy ratings, construction age, and tenure from EPC
|
||||||
|
surveys, plus the most recent sale price from the Land Registry.
|
||||||
|
</p>
|
||||||
|
{onNavigateToSource && (
|
||||||
|
<button
|
||||||
|
onClick={() => {
|
||||||
|
onNavigateToSource('epc');
|
||||||
|
setShowInfo(false);
|
||||||
|
}}
|
||||||
|
className="text-sm text-teal-600 dark:text-teal-400 hover:text-teal-800 dark:hover:text-teal-300 hover:underline"
|
||||||
|
>
|
||||||
|
View data source
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Search and sort controls */}
|
||||||
|
<div className="p-2 border-b border-warm-200 dark:border-navy-700 space-y-2">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={search}
|
||||||
|
onChange={(e) => setSearch(e.target.value)}
|
||||||
|
placeholder="Search by address or postcode..."
|
||||||
|
className="w-full p-2 border border-warm-300 dark:border-navy-700 rounded text-sm bg-white dark:bg-navy-800 dark:text-warm-200 placeholder-warm-400 dark:placeholder-warm-500"
|
||||||
|
/>
|
||||||
|
<select
|
||||||
|
value={sortBy}
|
||||||
|
onChange={(e) => setSortBy(e.target.value as SortBy)}
|
||||||
|
className="w-full p-2 border border-warm-300 dark:border-navy-700 rounded text-sm bg-white dark:bg-navy-800 dark:text-warm-200"
|
||||||
|
>
|
||||||
|
<option value="price">Price (High to Low)</option>
|
||||||
|
<option value="size">Size (Large to Small)</option>
|
||||||
|
<option value="energy">Energy Rating (Best to Worst)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Properties list */}
|
||||||
|
<div className="flex-1 overflow-y-auto">
|
||||||
|
{loading && properties.length === 0 ? (
|
||||||
|
<div className="p-4 dark:text-warm-400">Loading...</div>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
{filteredAndSorted.map((property, idx) => (
|
||||||
|
<PropertyCard key={idx} property={property} />
|
||||||
|
))}
|
||||||
|
{properties.length < total && (
|
||||||
|
<button
|
||||||
|
onClick={onLoadMore}
|
||||||
|
disabled={loading}
|
||||||
|
className="w-full p-4 text-teal-600 dark:text-teal-400 hover:bg-teal-50 dark:hover:bg-teal-900/30 disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{loading ? 'Loading...' : `Load More (${total - properties.length} remaining)`}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatDuration(d: string): string {
|
||||||
|
if (d === 'F') return 'Freehold';
|
||||||
|
if (d === 'L') return 'Leasehold';
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatAge(value: number, approximate = true): string {
|
||||||
|
if (value >= 1000) return approximate ? `~${Math.round(value)}` : `${Math.round(value)}`;
|
||||||
|
return Math.round(value).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to get a numeric value from a property, trying multiple field names
|
||||||
|
function getNum(property: Property, ...keys: string[]): number | undefined {
|
||||||
|
for (const key of keys) {
|
||||||
|
const v = property[key];
|
||||||
|
if (v !== undefined && v !== null && typeof v === 'number') return v;
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Property card component showing all fields
|
||||||
|
function PropertyCard({ property }: { property: Property }) {
|
||||||
|
const fmt = (value: number | undefined, decimals = 0): string => {
|
||||||
|
if (value === undefined) return '';
|
||||||
|
return decimals > 0 ? value.toFixed(decimals) : Math.round(value).toLocaleString();
|
||||||
|
};
|
||||||
|
|
||||||
|
const price = getNum(property, 'Last known price', 'latest_price');
|
||||||
|
const pricePerSqm = getNum(property, 'Price per sqm', 'price_per_sqm');
|
||||||
|
const floorArea = getNum(property, 'Total floor area (sqm)', 'total_floor_area');
|
||||||
|
const rooms = getNum(
|
||||||
|
property,
|
||||||
|
'Rooms (including bedrooms & bathrooms)',
|
||||||
|
'number_habitable_rooms'
|
||||||
|
);
|
||||||
|
const age = getNum(property, 'Approximate construction age', 'construction_age_band');
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="p-4 border-b border-warm-100 dark:border-navy-800 hover:bg-warm-50 dark:hover:bg-navy-800">
|
||||||
|
{/* Address & postcode */}
|
||||||
|
<div className="font-semibold dark:text-warm-100">{property.address || 'Unknown Address'}</div>
|
||||||
|
<div className="text-sm text-warm-600 dark:text-warm-400">{property.postcode}</div>
|
||||||
|
|
||||||
|
{/* Price */}
|
||||||
|
{price !== undefined && (
|
||||||
|
<div className="mt-2 text-lg font-bold text-teal-700 dark:text-teal-400">
|
||||||
|
£{fmt(price)}
|
||||||
|
{pricePerSqm !== undefined && (
|
||||||
|
<span className="text-sm font-normal text-warm-600 dark:text-warm-400"> (£{fmt(pricePerSqm)}/m²)</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Property details grid */}
|
||||||
|
<div className="mt-2 grid grid-cols-2 gap-x-4 gap-y-1 text-sm dark:text-warm-300">
|
||||||
|
{property.property_type && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Type:</span> {property.property_type}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{property.built_form && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Built form:</span> {property.built_form}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{property.duration && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Tenure:</span> {formatDuration(property.duration)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{floorArea !== undefined && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Floor area:</span> {fmt(floorArea)}m²
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{rooms !== undefined && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Rooms:</span> {fmt(rooms)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{age !== undefined && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">Built:</span> {formatAge(age, property.is_construction_date_approximate ?? true)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{property.current_energy_rating && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">EPC rating:</span> {property.current_energy_rating}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{property.potential_energy_rating && (
|
||||||
|
<div>
|
||||||
|
<span className="text-warm-500 dark:text-warm-400">EPC potential:</span> {property.potential_energy_rating}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -7,6 +7,6 @@ interface LabelProps {
|
||||||
|
|
||||||
export function Label({ children, className }: LabelProps) {
|
export function Label({ children, className }: LabelProps) {
|
||||||
return (
|
return (
|
||||||
<label className={`text-sm font-medium text-slate-700 ${className || ''}`}>{children}</label>
|
<label className={`text-sm font-medium text-warm-700 dark:text-warm-300 ${className || ''}`}>{children}</label>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,13 +11,13 @@ export function Slider({ className, ...props }: SliderProps) {
|
||||||
className={cn('relative flex w-full touch-none select-none items-center', className)}
|
className={cn('relative flex w-full touch-none select-none items-center', className)}
|
||||||
{...props}
|
{...props}
|
||||||
>
|
>
|
||||||
<SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-slate-200">
|
<SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-warm-200 dark:bg-navy-700">
|
||||||
<SliderPrimitive.Range className="absolute h-full bg-slate-900" />
|
<SliderPrimitive.Range className="absolute h-full bg-teal-600" />
|
||||||
</SliderPrimitive.Track>
|
</SliderPrimitive.Track>
|
||||||
{props.value?.map((_, i) => (
|
{props.value?.map((_, i) => (
|
||||||
<SliderPrimitive.Thumb
|
<SliderPrimitive.Thumb
|
||||||
key={i}
|
key={i}
|
||||||
className="block h-5 w-5 rounded-full border-2 border-slate-900 bg-white ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-slate-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
|
className="block h-5 w-5 rounded-full border-2 border-teal-600 dark:border-teal-500 bg-white dark:bg-navy-800 ring-offset-white dark:ring-offset-navy-950 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-teal-600 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50"
|
||||||
/>
|
/>
|
||||||
))}
|
))}
|
||||||
</SliderPrimitive.Root>
|
</SliderPrimitive.Root>
|
||||||
|
|
|
||||||
|
|
@ -9,3 +9,41 @@ body,
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
html.dark {
|
||||||
|
background-color: #0a0e1a;
|
||||||
|
color-scheme: dark;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Smooth theme transitions (scoped to avoid map performance issues) */
|
||||||
|
body,
|
||||||
|
div,
|
||||||
|
aside,
|
||||||
|
section,
|
||||||
|
header,
|
||||||
|
nav,
|
||||||
|
button,
|
||||||
|
input,
|
||||||
|
select,
|
||||||
|
label,
|
||||||
|
span,
|
||||||
|
p,
|
||||||
|
h1,
|
||||||
|
h2,
|
||||||
|
h3 {
|
||||||
|
transition: background-color 0.2s ease, border-color 0.2s ease, color 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fade-in animation for homepage sections */
|
||||||
|
.fade-in-section {
|
||||||
|
opacity: 0;
|
||||||
|
transform: translateY(24px);
|
||||||
|
transition:
|
||||||
|
opacity 0.6s ease-out,
|
||||||
|
transform 0.6s ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
|
.fade-in-visible {
|
||||||
|
opacity: 1;
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,14 @@
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>UK Property Prices Map</title>
|
<title>Narrowit</title>
|
||||||
|
<script>
|
||||||
|
(function() {
|
||||||
|
if (localStorage.getItem('theme') === 'dark') {
|
||||||
|
document.documentElement.classList.add('dark');
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="root"></div>
|
<div id="root"></div>
|
||||||
|
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
import type { Filters } from '../types';
|
|
||||||
|
|
||||||
// Filter configuration constants
|
|
||||||
// Should match backend pipeline/config.py
|
|
||||||
|
|
||||||
export const YEAR_MIN = 1995;
|
|
||||||
export const YEAR_MAX = 2024;
|
|
||||||
export const YEAR_STEP = 1;
|
|
||||||
|
|
||||||
export const PRICE_MIN = 0;
|
|
||||||
export const PRICE_MAX = 5000000; // £5M max for slider, but no server-side cap
|
|
||||||
export const PRICE_STEP = 50000;
|
|
||||||
|
|
||||||
export const DEFAULT_FILTERS: Filters = {
|
|
||||||
minYear: 2020,
|
|
||||||
maxYear: YEAR_MAX,
|
|
||||||
minPrice: PRICE_MIN,
|
|
||||||
maxPrice: PRICE_MAX,
|
|
||||||
};
|
|
||||||
|
|
@ -1,8 +1,31 @@
|
||||||
export interface Filters {
|
export interface FeatureMeta {
|
||||||
minYear: number;
|
name: string;
|
||||||
maxYear: number;
|
type: 'numeric' | 'enum';
|
||||||
minPrice: number;
|
group?: string;
|
||||||
maxPrice: number;
|
// Numeric-only fields
|
||||||
|
min?: number;
|
||||||
|
max?: number;
|
||||||
|
step?: number;
|
||||||
|
// Enum-only fields
|
||||||
|
values?: string[];
|
||||||
|
// Description fields
|
||||||
|
description?: string;
|
||||||
|
detail?: string;
|
||||||
|
source?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FeatureGroup {
|
||||||
|
name: string;
|
||||||
|
features: FeatureMeta[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filters: feature name -> [selectedMin, selectedMax] for numeric, string[] for enum
|
||||||
|
export type FeatureFilters = Record<string, [number, number] | string[]>;
|
||||||
|
|
||||||
|
export interface HexagonData {
|
||||||
|
h3: string;
|
||||||
|
count: number;
|
||||||
|
[key: string]: string | number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Bounds {
|
export interface Bounds {
|
||||||
|
|
@ -12,15 +35,6 @@ export interface Bounds {
|
||||||
east: number;
|
east: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface HexagonData {
|
|
||||||
h3: string;
|
|
||||||
count: number;
|
|
||||||
avg_price: number;
|
|
||||||
median_price: number;
|
|
||||||
min_price: number;
|
|
||||||
max_price: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface ViewState {
|
export interface ViewState {
|
||||||
longitude: number;
|
longitude: number;
|
||||||
latitude: number;
|
latitude: number;
|
||||||
|
|
@ -33,6 +47,8 @@ export interface ViewChangeParams {
|
||||||
resolution: number;
|
resolution: number;
|
||||||
bounds: Bounds;
|
bounds: Bounds;
|
||||||
zoom: number;
|
zoom: number;
|
||||||
|
latitude: number;
|
||||||
|
longitude: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ApiResponse {
|
export interface ApiResponse {
|
||||||
|
|
@ -43,21 +59,69 @@ export interface POI {
|
||||||
id: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
category: string;
|
category: string;
|
||||||
|
group: string;
|
||||||
lat: number;
|
lat: number;
|
||||||
lng: number;
|
lng: number;
|
||||||
|
emoji: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface POIResponse {
|
export interface POIResponse {
|
||||||
features: POI[];
|
pois: POI[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export const POI_CATEGORY_GROUPS = [
|
export interface POICategoryGroup {
|
||||||
'schools',
|
name: string;
|
||||||
'healthcare',
|
categories: string[];
|
||||||
'transport',
|
}
|
||||||
'parks',
|
|
||||||
'emergency',
|
|
||||||
'supermarkets',
|
|
||||||
] as const;
|
|
||||||
|
|
||||||
export type POICategoryGroup = (typeof POI_CATEGORY_GROUPS)[number];
|
export interface POICategoriesResponse {
|
||||||
|
groups: POICategoryGroup[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Property {
|
||||||
|
// String fields
|
||||||
|
address?: string;
|
||||||
|
postcode?: string;
|
||||||
|
property_type?: string;
|
||||||
|
built_form?: string;
|
||||||
|
duration?: string;
|
||||||
|
current_energy_rating?: string;
|
||||||
|
potential_energy_rating?: string;
|
||||||
|
|
||||||
|
// Numeric fields
|
||||||
|
lat: number;
|
||||||
|
lon: number;
|
||||||
|
|
||||||
|
is_construction_date_approximate?: boolean;
|
||||||
|
|
||||||
|
// All other numeric features (dynamic, including construction_age_band)
|
||||||
|
[key: string]: string | number | boolean | undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HexagonPropertiesResponse {
|
||||||
|
properties: Property[];
|
||||||
|
total: number;
|
||||||
|
limit: number;
|
||||||
|
offset: number;
|
||||||
|
truncated: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface NumericFeatureStats {
|
||||||
|
name: string;
|
||||||
|
count: number;
|
||||||
|
min: number;
|
||||||
|
max: number;
|
||||||
|
mean: number;
|
||||||
|
histogram: { min: number; max: number; bin_width: number; counts: number[] };
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EnumFeatureStats {
|
||||||
|
name: string;
|
||||||
|
counts: Record<string, number>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HexagonStatsResponse {
|
||||||
|
count: number;
|
||||||
|
numeric_features: NumericFeatureStats[];
|
||||||
|
enum_features: EnumFeatureStats[];
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,54 @@
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
darkMode: 'class',
|
||||||
content: ['./src/**/*.{js,jsx,ts,tsx,html}'],
|
content: ['./src/**/*.{js,jsx,ts,tsx,html}'],
|
||||||
theme: {
|
theme: {
|
||||||
extend: {},
|
extend: {
|
||||||
|
colors: {
|
||||||
|
navy: {
|
||||||
|
50: '#eef1f8',
|
||||||
|
100: '#d9dff0',
|
||||||
|
200: '#b3bfe1',
|
||||||
|
300: '#8d9fd2',
|
||||||
|
400: '#677fc3',
|
||||||
|
500: '#4a63a8',
|
||||||
|
600: '#2a3f6b',
|
||||||
|
700: '#1e2d50',
|
||||||
|
800: '#141e38',
|
||||||
|
900: '#0f1528',
|
||||||
|
950: '#0a0e1a',
|
||||||
|
},
|
||||||
|
teal: {
|
||||||
|
50: '#effefb',
|
||||||
|
100: '#c7fff4',
|
||||||
|
200: '#90ffe9',
|
||||||
|
300: '#51f7d9',
|
||||||
|
400: '#1de4c3',
|
||||||
|
500: '#05c9aa',
|
||||||
|
600: '#00a28c',
|
||||||
|
700: '#058172',
|
||||||
|
800: '#0a665b',
|
||||||
|
900: '#0d544c',
|
||||||
|
950: '#003330',
|
||||||
|
},
|
||||||
|
coral: {
|
||||||
|
400: '#fb923c',
|
||||||
|
500: '#f97316',
|
||||||
|
600: '#ea580c',
|
||||||
|
},
|
||||||
|
warm: {
|
||||||
|
50: '#fafaf9',
|
||||||
|
100: '#f5f5f4',
|
||||||
|
200: '#e7e5e4',
|
||||||
|
300: '#d6d3d1',
|
||||||
|
400: '#a8a29e',
|
||||||
|
500: '#78716c',
|
||||||
|
600: '#57534e',
|
||||||
|
700: '#44403c',
|
||||||
|
800: '#292524',
|
||||||
|
900: '#1c1917',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
plugins: [require('tailwindcss-animate')],
|
plugins: [require('tailwindcss-animate')],
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,49 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# /// script
|
|
||||||
# requires-python = ">=3.12"
|
|
||||||
# dependencies = ["openapi-python-client"]
|
|
||||||
# ///
|
|
||||||
"""Regenerate the TfL Journey API client from the OpenAPI specification."""
|
|
||||||
|
|
||||||
# Run it with:
|
|
||||||
# uv run generate_tfl_client.py
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
OPENAPI_SPEC = Path("Journey.yaml")
|
|
||||||
OUTPUT_PATH = Path("tfl_journey_client")
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
if not OPENAPI_SPEC.exists():
|
|
||||||
raise FileNotFoundError(f"OpenAPI spec not found: {OPENAPI_SPEC}")
|
|
||||||
|
|
||||||
# Skip if client already exists
|
|
||||||
if OUTPUT_PATH.exists():
|
|
||||||
print(f"TfL client already exists at {OUTPUT_PATH}, skipping")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Generate the client
|
|
||||||
print(f"Generating client from {OPENAPI_SPEC}")
|
|
||||||
result = subprocess.run(
|
|
||||||
[
|
|
||||||
"openapi-python-client",
|
|
||||||
"generate",
|
|
||||||
"--path",
|
|
||||||
str(OPENAPI_SPEC),
|
|
||||||
"--output-path",
|
|
||||||
str(OUTPUT_PATH),
|
|
||||||
],
|
|
||||||
check=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode == 0:
|
|
||||||
print(f"Client generated successfully at {OUTPUT_PATH}")
|
|
||||||
else:
|
|
||||||
print("Client generation failed")
|
|
||||||
raise SystemExit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
6
main.py
6
main.py
|
|
@ -1,6 +0,0 @@
|
||||||
def main():
|
|
||||||
print("Hello from property-map!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
|
|
||||||
class DataSource(ABC):
|
|
||||||
"""Base class for all data sources."""
|
|
||||||
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def name(self) -> str:
|
|
||||||
"""Unique identifier for this data source."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def load(self) -> pl.LazyFrame:
|
|
||||||
"""Load raw data as LazyFrame."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
|
|
||||||
"""Process and join with postcode coordinates."""
|
|
||||||
pass
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
"""Shared configuration for the pipeline and server."""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
DATA_DIR = Path(__file__).parent.parent / "data_sources"
|
|
||||||
PROCESSED_DIR = DATA_DIR / "processed"
|
|
||||||
AGGREGATES_DIR = PROCESSED_DIR / "aggregates"
|
|
||||||
|
|
||||||
# H3 resolutions to generate and serve
|
|
||||||
# https://h3geo.org/docs/core-library/restable/#average-area-in-m2
|
|
||||||
H3_RESOLUTIONS = [7, 8, 9, 10, 11]
|
|
||||||
DEFAULT_H3_RESOLUTION = 8
|
|
||||||
|
|
||||||
# Year filters
|
|
||||||
MIN_YEAR = 1995
|
|
||||||
MAX_YEAR = 2024
|
|
||||||
DEFAULT_MIN_YEAR = 2020
|
|
||||||
DEFAULT_MAX_YEAR = 2024
|
|
||||||
|
|
||||||
# Price filters
|
|
||||||
DEFAULT_MIN_PRICE = 0
|
|
||||||
DEFAULT_MAX_PRICE = 100_000_000
|
|
||||||
38
pipeline/download/arcgis.py
Normal file
38
pipeline/download/arcgis.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pipeline.utils import download, extract_zip
|
||||||
|
|
||||||
|
URL = "https://www.arcgis.com/sharing/rest/content/items/077631e063eb4e1ab43575d01381ec33/data"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
|
||||||
|
df = pl.scan_csv(data_path / "Data/NSPL_MAY_2025_UK.csv", try_parse_dates=True)
|
||||||
|
print(f"Columns: {df.collect_schema().names()}")
|
||||||
|
parquet_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
df.sink_parquet(parquet_path, compression="zstd")
|
||||||
|
print(f"Saved to {parquet_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download and convert ArcGIS postcode data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as cache_dir:
|
||||||
|
download_path = Path(cache_dir) / "arcgis_data.zip"
|
||||||
|
extract_path = Path(cache_dir) / "arcgis_extracted"
|
||||||
|
|
||||||
|
download(URL, download_path)
|
||||||
|
extract_zip(download_path, extract_path)
|
||||||
|
convert_to_parquet(extract_path, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
62
pipeline/download/broadband.py
Normal file
62
pipeline/download/broadband.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pipeline.utils import download, extract_zip
|
||||||
|
|
||||||
|
# Ofcom Connected Nations 2025 - Fixed broadband performance (output area & local authority level)
|
||||||
|
# Source: https://www.ofcom.org.uk/phones-and-broadband/coverage-and-speeds/connected-nations-20252/data-downloads-2025
|
||||||
|
PERFORMANCE_URL = "https://www.ofcom.org.uk/siteassets/resources/documents/research-and-data/multi-sector/infrastructure-research/connected-nations-2025/202507_fixed_broadband_coverage_r01.zip?v=407830"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_parquet(extract_dir: Path, parquet_path: Path) -> None:
|
||||||
|
# Find CSV files in the extracted directory
|
||||||
|
csv_files = list(extract_dir.rglob("*.csv"))
|
||||||
|
if not csv_files:
|
||||||
|
raise FileNotFoundError(f"No CSV files found in {extract_dir}")
|
||||||
|
|
||||||
|
print(f"Found {len(csv_files)} CSV files: {[f.name for f in csv_files]}")
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
for csv_file in sorted(csv_files):
|
||||||
|
print(f"Reading {csv_file.name}...")
|
||||||
|
df = pl.read_csv(csv_file, infer_schema_length=10000, encoding="utf8-lossy")
|
||||||
|
print(f" Shape: {df.shape}")
|
||||||
|
frames.append(df)
|
||||||
|
|
||||||
|
combined = pl.concat(frames, how="diagonal_relaxed")
|
||||||
|
print(f"Combined shape: {combined.shape}")
|
||||||
|
|
||||||
|
parquet_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
combined.write_parquet(parquet_path, compression="zstd")
|
||||||
|
print(f"Saved {parquet_path} ({combined.shape[0]} rows)")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download Ofcom broadband performance data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Output parquet file path",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory(delete=False) as cache_dir:
|
||||||
|
cache = Path(cache_dir)
|
||||||
|
zip_path = cache / "broadband_performance.zip"
|
||||||
|
extract_dir = cache / "extracted"
|
||||||
|
extracted_again_dir = cache / "extracted-again"
|
||||||
|
|
||||||
|
download(PERFORMANCE_URL, zip_path)
|
||||||
|
extract_zip(zip_path, extract_dir)
|
||||||
|
print(list((extract_dir / "202507_fixed_coverage_r01").glob("*")))
|
||||||
|
extract_zip(extract_dir / "202507_fixed_coverage_r01" / "202507_fixed_pc_coverage_r01.zip", extracted_again_dir)
|
||||||
|
|
||||||
|
convert_to_parquet(extracted_again_dir, args.output)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
43
pipeline/download/deprivation_data.py
Normal file
43
pipeline/download/deprivation_data.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pipeline.utils import download
|
||||||
|
|
||||||
|
URL = "https://assets.publishing.service.gov.uk/media/691ded34513046b952c500bd/File_5_IoD2025_Scores_for_the_Indices_of_Deprivation.xlsx"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
|
||||||
|
print("Reading Excel file (sheet 2)...")
|
||||||
|
|
||||||
|
# Read the 2nd sheet (index 1) - IoD2025 Scores
|
||||||
|
df = pl.read_excel(
|
||||||
|
xlsx_path,
|
||||||
|
sheet_id=2, # 1-indexed, so 2 = second sheet
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Shape: {df.shape}")
|
||||||
|
print(f"Columns: {df.columns}")
|
||||||
|
|
||||||
|
df.write_parquet(parquet_path, compression="zstd")
|
||||||
|
print(f"Saved to {parquet_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download and convert Index of Deprivation data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as cache_dir:
|
||||||
|
xlsx_path = Path(cache_dir) / "IoD2025_Scores.xlsx"
|
||||||
|
download(URL, xlsx_path, timeout=60)
|
||||||
|
convert_to_parquet(xlsx_path, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
58
pipeline/download/ethnicity.py
Normal file
58
pipeline/download/ethnicity.py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
pl.Config.set_tbl_cols(-1)
|
||||||
|
|
||||||
|
|
||||||
|
URL = "https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/downloads/population-by-ethnicity-and-local-authority-2021.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def download_and_convert(output_path: Path) -> None:
|
||||||
|
print("Downloading ethnicity data...")
|
||||||
|
response = httpx.get(URL, follow_redirects=True, timeout=60)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
df = pl.read_csv(response.content)
|
||||||
|
print(f"Raw shape: {df.head(100)}")
|
||||||
|
|
||||||
|
# Keep only broad ethnicity categories (5+1), exclude "All" totals
|
||||||
|
df = df.filter(
|
||||||
|
(pl.col("Ethnicity_type") == "ONS 2021 5+1") & (pl.col("Ethnicity") != "All")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pivot: one row per local authority, columns = ethnicity percentages
|
||||||
|
wide = df.pivot(
|
||||||
|
on="Ethnicity",
|
||||||
|
index="Geography_code",
|
||||||
|
values="Value1",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Rename columns to be descriptive
|
||||||
|
rename_map = {
|
||||||
|
col: f"% {col}" for col in wide.columns if col != "Geography_code"
|
||||||
|
}
|
||||||
|
wide = wide.rename(rename_map)
|
||||||
|
|
||||||
|
print(f"Output shape: {wide.shape}")
|
||||||
|
print(f"Columns: {wide.columns}")
|
||||||
|
|
||||||
|
wide.write_parquet(output_path, compression="zstd")
|
||||||
|
print(f"Saved to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download and convert ethnicity by local authority data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
download_and_convert(args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
69
pipeline/download/naptan.py
Normal file
69
pipeline/download/naptan.py
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
"""Download NaPTAN data and extract railway/metro station POIs."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import io
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
NAPTAN_CSV_URL = "https://naptan.api.dft.gov.uk/v1/access-nodes?dataFormat=csv"
|
||||||
|
|
||||||
|
|
||||||
|
STOP_TYPES = {
|
||||||
|
'AIR': "Airport",
|
||||||
|
'FTD': "Ferry",
|
||||||
|
"RSE": "Rail station",
|
||||||
|
"BCT": "Bus stop",
|
||||||
|
"BCE": "Bus station",
|
||||||
|
"TXR": "Taxi rank",
|
||||||
|
"TMU": "Metro or Tram stop",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def download_naptan(output: Path) -> None:
|
||||||
|
output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print(f"Downloading NaPTAN data from {NAPTAN_CSV_URL}")
|
||||||
|
with urllib.request.urlopen(NAPTAN_CSV_URL) as resp:
|
||||||
|
raw = resp.read()
|
||||||
|
|
||||||
|
print(f"Downloaded {len(raw) / (1024 * 1024):.1f} MB")
|
||||||
|
|
||||||
|
df = (
|
||||||
|
pl.read_csv(io.BytesIO(raw), infer_schema_length=0)
|
||||||
|
.with_columns(
|
||||||
|
pl.col("Latitude").cast(pl.Float64, strict=False),
|
||||||
|
pl.col("Longitude").cast(pl.Float64, strict=False),
|
||||||
|
)
|
||||||
|
.drop_nulls(subset=["Latitude", "Longitude"])
|
||||||
|
.filter(pl.col("StopType").is_in(list(STOP_TYPES.keys())))
|
||||||
|
.select(
|
||||||
|
pl.col("ATCOCode").alias("id"),
|
||||||
|
pl.col("CommonName").alias("name"),
|
||||||
|
pl.col("StopType").replace(STOP_TYPES).alias("category"),
|
||||||
|
pl.col("Latitude").alias("lat"),
|
||||||
|
pl.col("Longitude").alias("lng"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
df.write_parquet(output)
|
||||||
|
size_mb = output.stat().st_size / (1024 * 1024)
|
||||||
|
print(f"Wrote {output} ({size_mb:.1f} MB, {len(df):,} stations)")
|
||||||
|
|
||||||
|
counts = df.group_by("category").len().sort("len", descending=True)
|
||||||
|
for row in counts.iter_rows(named=True):
|
||||||
|
print(f" {row['category']}: {row['len']:,}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Download NaPTAN station data")
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
download_naptan(args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
283
pipeline/download/noise.py
Normal file
283
pipeline/download/noise.py
Normal file
|
|
@ -0,0 +1,283 @@
|
||||||
|
"""Download Defra Round 4 (2022) strategic noise data for England.
|
||||||
|
|
||||||
|
Downloads modelled noise levels (road, rail, airport) as GeoTIFF rasters via
|
||||||
|
WCS, then samples noise values at postcode centroids. Outputs a parquet file
|
||||||
|
with postcode-level noise in dB for each source.
|
||||||
|
|
||||||
|
Uses 100km tiles (~42 per source) to balance request size vs count. The server
|
||||||
|
times out on tiles larger than ~150km at 100m resolution.
|
||||||
|
|
||||||
|
Data source: Defra Strategic Noise Mapping Round 4 (2022)
|
||||||
|
- Lden = day-evening-night 24h weighted average (the EU standard metric)
|
||||||
|
- 10m grid, modelled at 4m above ground
|
||||||
|
License: Open Government Licence v3.0
|
||||||
|
|
||||||
|
Note: Road/rail use WCS 1.0.0; airport requires WCS 2.0.1 (Defra's 1.0.0
|
||||||
|
endpoint is broken for that coverage).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
import rasterio
|
||||||
|
from pyproj import Transformer
|
||||||
|
from rasterio.merge import merge
|
||||||
|
from rasterio.transform import rowcol
|
||||||
|
|
||||||
|
# Noise sources: (label, column_name, WCS base URL, coverage ID, WCS version)
|
||||||
|
# Road/rail work with WCS 1.0.0; airport requires WCS 2.0.1.
|
||||||
|
NOISE_SOURCES = [
|
||||||
|
(
|
||||||
|
"Road",
|
||||||
|
"road_noise_lden_db",
|
||||||
|
"https://environment.data.gov.uk/spatialdata/road-noise-all-metrics-england-round-4/wcs",
|
||||||
|
"Road_Noise_Lden_England_Round_4_All",
|
||||||
|
"1.0.0",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Rail",
|
||||||
|
"rail_noise_lden_db",
|
||||||
|
"https://environment.data.gov.uk/spatialdata/noise-data/wcs",
|
||||||
|
"Rail_Noise_Lden_England_Round_4_All",
|
||||||
|
"1.0.0",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Airport",
|
||||||
|
"airport_noise_lden_db",
|
||||||
|
"https://environment.data.gov.uk/spatialdata/airport-noise-all-metrics-england-round-4/wcs",
|
||||||
|
"dac9cba4-abe7-43bd-b8e9-8a83da52edd8__Airport_Noise_ALL_Lden",
|
||||||
|
"2.0.1",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
# England extent in EPSG:27700 (British National Grid), rounded outward
|
||||||
|
BNG_MIN_E = 80_000
|
||||||
|
BNG_MAX_E = 660_000
|
||||||
|
BNG_MIN_N = 0
|
||||||
|
BNG_MAX_N = 660_000
|
||||||
|
|
||||||
|
# Tile size in metres (100km balances request size vs count; 300km causes 504s)
|
||||||
|
TILE_SIZE = 100_000
|
||||||
|
|
||||||
|
# Max concurrent tile downloads
|
||||||
|
MAX_WORKERS = 4
|
||||||
|
|
||||||
|
# Native raster resolution (10m grid)
|
||||||
|
NATIVE_RESOLUTION = 10
|
||||||
|
|
||||||
|
# Request pixel resolution in metres (100m is sufficient for postcode-level data
|
||||||
|
# and keeps download size ~100x smaller than native 10m)
|
||||||
|
RESOLUTION = 100
|
||||||
|
|
||||||
|
|
||||||
|
def _wcs_get_coverage_url(
|
||||||
|
wcs_base: str,
|
||||||
|
coverage_id: str,
|
||||||
|
min_e: int,
|
||||||
|
min_n: int,
|
||||||
|
max_e: int,
|
||||||
|
max_n: int,
|
||||||
|
wcs_version: str = "1.0.0",
|
||||||
|
) -> str:
|
||||||
|
"""Build a WCS GetCoverage URL for a BNG bounding box."""
|
||||||
|
if wcs_version == "2.0.1":
|
||||||
|
return (
|
||||||
|
f"{wcs_base}?"
|
||||||
|
f"service=WCS&version=2.0.1&request=GetCoverage"
|
||||||
|
f"&coverageId={coverage_id}"
|
||||||
|
f"&format=image/tiff"
|
||||||
|
f"&subsettingCRS=EPSG:27700"
|
||||||
|
f"&subset=E({min_e},{max_e})"
|
||||||
|
f"&subset=N({min_n},{max_n})"
|
||||||
|
f"&scaleFactor={NATIVE_RESOLUTION / RESOLUTION}"
|
||||||
|
)
|
||||||
|
width = (max_e - min_e) // RESOLUTION
|
||||||
|
height = (max_n - min_n) // RESOLUTION
|
||||||
|
return (
|
||||||
|
f"{wcs_base}?"
|
||||||
|
f"service=WCS&version=1.0.0&request=GetCoverage"
|
||||||
|
f"&coverage={coverage_id}"
|
||||||
|
f"&CRS=EPSG:27700"
|
||||||
|
f"&BBOX={min_e},{min_n},{max_e},{max_n}"
|
||||||
|
f"&width={width}&height={height}"
|
||||||
|
f"&format=GeoTIFF"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _bng_from_latlon(lat: np.ndarray, lon: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||||
|
"""Convert WGS84 lat/lon to British National Grid easting/northing."""
|
||||||
|
return _TO_BNG.transform(lon, lat) # pyproj takes (x=lon, y=lat)
|
||||||
|
|
||||||
|
|
||||||
|
def _download_tile(
|
||||||
|
wcs_base: str,
|
||||||
|
coverage_id: str,
|
||||||
|
min_e: int,
|
||||||
|
min_n: int,
|
||||||
|
max_e: int,
|
||||||
|
max_n: int,
|
||||||
|
tile_path: Path,
|
||||||
|
wcs_version: str = "1.0.0",
|
||||||
|
) -> Path | None:
|
||||||
|
"""Download a single WCS tile. Returns path if successful, None otherwise."""
|
||||||
|
url = _wcs_get_coverage_url(wcs_base, coverage_id, min_e, min_n, max_e, max_n, wcs_version)
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=300, follow_redirects=True) as client:
|
||||||
|
resp = client.get(url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
content_type = resp.headers.get("content-type", "")
|
||||||
|
if "tiff" not in content_type and resp.content[:4] not in (b"II*\x00", b"MM\x00*"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
tile_path.write_bytes(resp.content)
|
||||||
|
return tile_path
|
||||||
|
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||||
|
print(f" Failed to download tile ({min_e},{min_n})-({max_e},{max_n}): {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def download_raster(
|
||||||
|
tile_dir: Path, wcs_base: str, coverage_id: str, label: str, wcs_version: str = "1.0.0"
|
||||||
|
) -> list[Path]:
|
||||||
|
"""Download noise GeoTIFF raster covering England, returning paths to saved files."""
|
||||||
|
tiles = []
|
||||||
|
for min_e in range(BNG_MIN_E, BNG_MAX_E, TILE_SIZE):
|
||||||
|
for min_n in range(BNG_MIN_N, BNG_MAX_N, TILE_SIZE):
|
||||||
|
max_e = min(min_e + TILE_SIZE, BNG_MAX_E)
|
||||||
|
max_n = min(min_n + TILE_SIZE, BNG_MAX_N)
|
||||||
|
tiles.append((min_e, min_n, max_e, max_n))
|
||||||
|
|
||||||
|
print(f"[{label}] Downloading {len(tiles)} tiles at {RESOLUTION}m resolution ({MAX_WORKERS} workers)...")
|
||||||
|
paths = []
|
||||||
|
completed = 0
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||||
|
futures = {}
|
||||||
|
for min_e, min_n, max_e, max_n in tiles:
|
||||||
|
tile_path = tile_dir / f"tile_{min_e}_{min_n}.tif"
|
||||||
|
fut = executor.submit(
|
||||||
|
_download_tile, wcs_base, coverage_id,
|
||||||
|
min_e, min_n, max_e, max_n, tile_path, wcs_version,
|
||||||
|
)
|
||||||
|
futures[fut] = (min_e, min_n)
|
||||||
|
|
||||||
|
for fut in as_completed(futures):
|
||||||
|
completed += 1
|
||||||
|
result = fut.result()
|
||||||
|
if result is not None:
|
||||||
|
paths.append(result)
|
||||||
|
print(
|
||||||
|
f"\r [{completed}/{len(tiles)}] Downloaded {len(paths)} valid tiles",
|
||||||
|
end="",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n[{label}] Downloaded {len(paths)}/{len(tiles)} tiles")
|
||||||
|
return paths
|
||||||
|
|
||||||
|
|
||||||
|
def sample_noise_at_postcodes(
|
||||||
|
tile_paths: list[Path],
|
||||||
|
easting: np.ndarray,
|
||||||
|
northing: np.ndarray,
|
||||||
|
label: str,
|
||||||
|
col_name: str,
|
||||||
|
) -> pl.Series:
|
||||||
|
"""Sample noise values from merged tiles at given BNG coordinates."""
|
||||||
|
print(f"[{label}] Merging {len(tile_paths)} tiles...")
|
||||||
|
datasets = [rasterio.open(p) for p in tile_paths]
|
||||||
|
raster_nodata = datasets[0].nodata
|
||||||
|
mosaic, mosaic_transform = merge(datasets)
|
||||||
|
for ds in datasets:
|
||||||
|
ds.close()
|
||||||
|
|
||||||
|
noise_grid = mosaic[0]
|
||||||
|
|
||||||
|
print(f"[{label}] Sampling noise values at postcode centroids...")
|
||||||
|
rows, cols = rowcol(mosaic_transform, easting, northing)
|
||||||
|
rows = np.asarray(rows)
|
||||||
|
cols = np.asarray(cols)
|
||||||
|
|
||||||
|
h, w = noise_grid.shape
|
||||||
|
in_bounds = (rows >= 0) & (rows < h) & (cols >= 0) & (cols < w)
|
||||||
|
|
||||||
|
noise_db = np.full(len(easting), np.nan, dtype=np.float32)
|
||||||
|
valid_rows = rows[in_bounds]
|
||||||
|
valid_cols = cols[in_bounds]
|
||||||
|
sampled = noise_grid[valid_rows, valid_cols].astype(np.float32)
|
||||||
|
|
||||||
|
# Mark nodata and zero (unmapped areas) as NaN.
|
||||||
|
# Road/rail use nodata=-96, airport uses nodata=3.4e38.
|
||||||
|
if raster_nodata is not None:
|
||||||
|
sampled[np.isclose(sampled, np.float32(raster_nodata), rtol=1e-5)] = np.nan
|
||||||
|
sampled[sampled == 0] = np.nan
|
||||||
|
noise_db[in_bounds] = sampled
|
||||||
|
|
||||||
|
valid_count = int(np.sum(~np.isnan(noise_db)))
|
||||||
|
print(f"[{label}] Sampled {valid_count:,} / {len(easting):,} postcodes with noise data")
|
||||||
|
|
||||||
|
# Return as masked Series: use null (not NaN) so that Polars max_horizontal
|
||||||
|
# correctly ignores missing values instead of propagating NaN.
|
||||||
|
return pl.Series(col_name, noise_db).fill_nan(None)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download Defra noise data (road, rail, airport) and sample at postcode centroids"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--arcgis",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="ArcGIS postcode data parquet (for lat/lon coordinates)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print("Loading postcode coordinates...")
|
||||||
|
postcodes = pl.read_parquet(
|
||||||
|
args.arcgis, columns=["pcds", "lat", "long"]
|
||||||
|
).rename({"pcds": "postcode", "long": "lon"})
|
||||||
|
|
||||||
|
lat = postcodes["lat"].to_numpy()
|
||||||
|
lon = postcodes["lon"].to_numpy()
|
||||||
|
|
||||||
|
print("Converting lat/lon to BNG...")
|
||||||
|
easting, northing = _bng_from_latlon(lat, lon)
|
||||||
|
|
||||||
|
result = postcodes.select("postcode")
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
for label, col_name, wcs_base, coverage_id, wcs_version in NOISE_SOURCES:
|
||||||
|
tile_dir = Path(tmp) / label.lower()
|
||||||
|
tile_dir.mkdir()
|
||||||
|
tile_paths = download_raster(tile_dir, wcs_base, coverage_id, label, wcs_version)
|
||||||
|
|
||||||
|
if not tile_paths:
|
||||||
|
print(f"[{label}] WARNING: No tiles downloaded — column will be all null")
|
||||||
|
series = pl.Series(col_name, [None] * len(lat), dtype=pl.Float32)
|
||||||
|
else:
|
||||||
|
series = sample_noise_at_postcodes(tile_paths, easting, northing, label, col_name)
|
||||||
|
|
||||||
|
result = result.with_columns(series)
|
||||||
|
|
||||||
|
result.write_parquet(args.output, compression="zstd")
|
||||||
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||||
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
46
pipeline/download/ofsted.py
Normal file
46
pipeline/download/ofsted.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pipeline.utils import download
|
||||||
|
|
||||||
|
# Management information - state-funded schools - latest inspections (as at 30 Apr 2025)
|
||||||
|
# Source: https://www.gov.uk/government/statistical-data-sets/monthly-management-information-ofsteds-school-inspections-outcomes
|
||||||
|
URL = "https://assets.publishing.service.gov.uk/media/681cd390275cb67b18d870fc/Management_information_-_state-funded_schools_-_latest_inspections_as_at_30_Apr_2025.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
|
||||||
|
print("Reading CSV...")
|
||||||
|
|
||||||
|
df = pl.read_csv(
|
||||||
|
csv_path,
|
||||||
|
infer_schema_length=10000,
|
||||||
|
encoding="utf8-lossy",
|
||||||
|
null_values=["NULL", "Not applicable"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Shape: {df.shape}")
|
||||||
|
print(f"Columns: {df.columns}")
|
||||||
|
|
||||||
|
df.write_parquet(parquet_path, compression="zstd")
|
||||||
|
print(f"Saved to {parquet_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download Ofsted school inspection outcomes data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as cache_dir:
|
||||||
|
csv_path = Path(cache_dir) / "ofsted_latest_inspections.csv"
|
||||||
|
download(URL, csv_path, timeout=60)
|
||||||
|
convert_to_parquet(csv_path, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
175
pipeline/download/pois.py
Normal file
175
pipeline/download/pois.py
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from tempfile import mkdtemp
|
||||||
|
|
||||||
|
import osmium
|
||||||
|
import polars as pl
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
BATCH_SIZE = 50_000
|
||||||
|
|
||||||
|
MIN_OCCURENCE_COUNT = 20
|
||||||
|
|
||||||
|
GEOFABRIK_GB_URL = "https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
||||||
|
|
||||||
|
UK_BBOX_WEST = -7.57
|
||||||
|
UK_BBOX_SOUTH = 49.96
|
||||||
|
UK_BBOX_EAST = 1.68
|
||||||
|
UK_BBOX_NORTH = 58.64
|
||||||
|
|
||||||
|
POI_TAG_KEYS: list[str] = [
|
||||||
|
"amenity",
|
||||||
|
"building",
|
||||||
|
"craft",
|
||||||
|
"emergency",
|
||||||
|
"healthcare",
|
||||||
|
"leisure",
|
||||||
|
"office",
|
||||||
|
"shop",
|
||||||
|
"tourism",
|
||||||
|
"public_transport",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def download_pbf(pbf_file: Path) -> None:
|
||||||
|
pbf_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = pbf_file.with_suffix(".pbf.tmp")
|
||||||
|
print(f"Downloading {GEOFABRIK_GB_URL}")
|
||||||
|
|
||||||
|
with (
|
||||||
|
tqdm(unit="B", unit_scale=True, desc="Downloading") as bar,
|
||||||
|
urllib.request.urlopen(GEOFABRIK_GB_URL) as resp,
|
||||||
|
open(tmp, "wb") as f,
|
||||||
|
):
|
||||||
|
length = resp.headers.get("Content-Length")
|
||||||
|
if length:
|
||||||
|
bar.total = int(length)
|
||||||
|
while chunk := resp.read(1 << 20):
|
||||||
|
f.write(chunk)
|
||||||
|
bar.update(len(chunk))
|
||||||
|
|
||||||
|
tmp.rename(pbf_file)
|
||||||
|
print(f"Saved to {pbf_file}")
|
||||||
|
|
||||||
|
|
||||||
|
class POIHandler(osmium.SimpleHandler):
|
||||||
|
def __init__(self, progress: tqdm, tmp_dir: Path) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._batch: list[dict] = []
|
||||||
|
self._tmp_dir = tmp_dir
|
||||||
|
self._batch_num = 0
|
||||||
|
self.poi_count = 0
|
||||||
|
self._progress = progress
|
||||||
|
|
||||||
|
def _in_uk(self, lat: float, lon: float) -> bool:
|
||||||
|
return (
|
||||||
|
UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
|
||||||
|
and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
|
||||||
|
)
|
||||||
|
|
||||||
|
def _match_tags(self, tags: osmium.osm.TagList) -> list[str]:
|
||||||
|
return [f"{key}/{tags[key]}" for key in POI_TAG_KEYS if key in tags]
|
||||||
|
|
||||||
|
def _get_name(self, tags: osmium.osm.TagList) -> str:
|
||||||
|
return tags.get("name:en", tags.get("name", ""))
|
||||||
|
|
||||||
|
def _flush_batch(self) -> None:
|
||||||
|
if not self._batch:
|
||||||
|
return
|
||||||
|
df = pl.DataFrame(self._batch)
|
||||||
|
out = self._tmp_dir / f"batch_{self._batch_num:05d}.parquet"
|
||||||
|
df.write_parquet(out)
|
||||||
|
self._batch_num += 1
|
||||||
|
self._batch.clear()
|
||||||
|
|
||||||
|
def _add_poi(
|
||||||
|
self,
|
||||||
|
osm_id: str,
|
||||||
|
tags: osmium.osm.TagList,
|
||||||
|
category: str,
|
||||||
|
lat: float,
|
||||||
|
lng: float,
|
||||||
|
) -> None:
|
||||||
|
self._batch.append(
|
||||||
|
{
|
||||||
|
"id": osm_id,
|
||||||
|
"name": self._get_name(tags),
|
||||||
|
"category": category,
|
||||||
|
"lat": lat,
|
||||||
|
"lng": lng,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.poi_count += 1
|
||||||
|
self._progress.set_postfix(pois=f"{self.poi_count:,}", refresh=False)
|
||||||
|
if len(self._batch) >= BATCH_SIZE:
|
||||||
|
self._flush_batch()
|
||||||
|
|
||||||
|
def _tick(self) -> None:
|
||||||
|
self._progress.update(1)
|
||||||
|
|
||||||
|
def node(self, n: osmium.osm.Node) -> None:
|
||||||
|
self._tick()
|
||||||
|
if not n.location.valid:
|
||||||
|
return
|
||||||
|
lat, lon = n.location.lat, n.location.lon
|
||||||
|
if not self._in_uk(lat, lon):
|
||||||
|
return
|
||||||
|
categories = self._match_tags(n.tags)
|
||||||
|
for category in categories:
|
||||||
|
self._add_poi(f"n{n.id}", n.tags, category, lat, lon)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download and extract POIs from OpenStreetMap"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as cache_dir:
|
||||||
|
pbf_file = Path(cache_dir) / "great-britain-latest.osm.pbf"
|
||||||
|
|
||||||
|
if not pbf_file.exists():
|
||||||
|
download_pbf(pbf_file)
|
||||||
|
else:
|
||||||
|
print(f"Using cached PBF file at {pbf_file}")
|
||||||
|
|
||||||
|
print(f"Tag keys: {POI_TAG_KEYS}")
|
||||||
|
|
||||||
|
tmp_dir = Path(mkdtemp(prefix="pois_"))
|
||||||
|
with tqdm(
|
||||||
|
unit=" elements",
|
||||||
|
unit_scale=True,
|
||||||
|
desc="Streaming",
|
||||||
|
smoothing=0.05,
|
||||||
|
mininterval=1.0,
|
||||||
|
) as progress:
|
||||||
|
handler = POIHandler(progress, tmp_dir)
|
||||||
|
handler.apply_file(str(pbf_file), locations=True)
|
||||||
|
handler._flush_batch() # write any remaining POIs
|
||||||
|
|
||||||
|
print(f"Extracted {handler.poi_count:,} POIs")
|
||||||
|
|
||||||
|
batch_files = sorted(tmp_dir.glob("batch_*.parquet"))
|
||||||
|
df = pl.concat([pl.scan_parquet(f) for f in batch_files])
|
||||||
|
|
||||||
|
# Only keep categories with enough occurrences
|
||||||
|
valid_categories = (
|
||||||
|
df.group_by("category")
|
||||||
|
.agg(pl.len().alias("count"))
|
||||||
|
.filter(pl.col("count") >= MIN_OCCURENCE_COUNT)
|
||||||
|
)
|
||||||
|
df = df.join(valid_categories.select("category"), on="category", how="semi")
|
||||||
|
|
||||||
|
print(f"Total POIs: {handler.poi_count:,}")
|
||||||
|
df.sink_parquet(args.output)
|
||||||
|
print(f"Saved to {args.output}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
66
pipeline/download/price_paid.py
Normal file
66
pipeline/download/price_paid.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import argparse
|
||||||
|
import tempfile
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from pipeline.utils import download
|
||||||
|
|
||||||
|
URL = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv"
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
|
||||||
|
"""Convert CSV to Parquet using Polars."""
|
||||||
|
print("Converting to Parquet...")
|
||||||
|
|
||||||
|
# https://www.gov.uk/guidance/about-the-price-paid-data
|
||||||
|
# Land Registry CSV columns
|
||||||
|
columns = [
|
||||||
|
"transaction_id",
|
||||||
|
"price",
|
||||||
|
"date_of_transfer",
|
||||||
|
"postcode",
|
||||||
|
"property_type",
|
||||||
|
"old_new",
|
||||||
|
"duration",
|
||||||
|
"paon",
|
||||||
|
"saon",
|
||||||
|
"street",
|
||||||
|
"locality",
|
||||||
|
"town_city",
|
||||||
|
"district",
|
||||||
|
"county",
|
||||||
|
"ppd_category",
|
||||||
|
"record_status",
|
||||||
|
]
|
||||||
|
|
||||||
|
df = pl.read_csv(
|
||||||
|
csv_path,
|
||||||
|
has_header=False,
|
||||||
|
new_columns=columns,
|
||||||
|
try_parse_dates=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
parquet_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"Columns: {df.collect_schema().names()}")
|
||||||
|
df.write_parquet(parquet_path, compression="zstd")
|
||||||
|
print(f"Saved to {parquet_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download and convert Land Registry price-paid data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as cache_dir:
|
||||||
|
csv_path = Path(cache_dir) / "price-paid-complete.csv"
|
||||||
|
|
||||||
|
download(URL, csv_path)
|
||||||
|
convert_to_parquet(csv_path, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
29
pipeline/journey_times/__init__.py
Normal file
29
pipeline/journey_times/__init__.py
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
"""Journey times calculation module for TfL transit data."""
|
||||||
|
|
||||||
|
from .config import (
|
||||||
|
DATA_DIR,
|
||||||
|
DESTINATIONS,
|
||||||
|
MAX_CONCURRENT,
|
||||||
|
MAX_DELAY,
|
||||||
|
MAX_POSTCODES,
|
||||||
|
OUTPUT_DIR,
|
||||||
|
REQUESTS_PER_MIN,
|
||||||
|
)
|
||||||
|
from .models import Destination, JourneyResult
|
||||||
|
from .results import results_to_dataframe, save_results
|
||||||
|
from .tfl_client import fetch_journey_times
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"DATA_DIR",
|
||||||
|
"OUTPUT_DIR",
|
||||||
|
"MAX_DELAY",
|
||||||
|
"REQUESTS_PER_MIN",
|
||||||
|
"MAX_POSTCODES",
|
||||||
|
"MAX_CONCURRENT",
|
||||||
|
"DESTINATIONS",
|
||||||
|
"Destination",
|
||||||
|
"JourneyResult",
|
||||||
|
"fetch_journey_times",
|
||||||
|
"results_to_dataframe",
|
||||||
|
"save_results",
|
||||||
|
]
|
||||||
142
pipeline/journey_times/__main__.py
Normal file
142
pipeline/journey_times/__main__.py
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
import asyncio
|
||||||
|
import random
|
||||||
|
from datetime import date, timedelta
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from .config import (
|
||||||
|
DESTINATIONS,
|
||||||
|
MAX_CONCURRENT,
|
||||||
|
MAX_POSTCODES,
|
||||||
|
OUTPUT_DIR,
|
||||||
|
MAX_DISTANCE_KM,
|
||||||
|
)
|
||||||
|
from .models import JourneyResult
|
||||||
|
from .results import CheckpointSaver, results_to_dataframe, save_results
|
||||||
|
from .tfl_client import fetch_journey_times
|
||||||
|
from pipeline.utils import haversine_km_expr
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
destination = DESTINATIONS["bank"]
|
||||||
|
|
||||||
|
# Calculate next Monday at 8am
|
||||||
|
today = date.today()
|
||||||
|
days_until_monday = (7 - today.weekday()) % 7 or 7
|
||||||
|
journey_date = today + timedelta(days=days_until_monday)
|
||||||
|
journey_time = "0845"
|
||||||
|
|
||||||
|
print(f"Destination: {destination.name}")
|
||||||
|
print(
|
||||||
|
f"Journey: {journey_date.strftime('%A %Y-%m-%d')} "
|
||||||
|
f"at {journey_time[:2]}:{journey_time[2:]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
postcodes_df = pl.read_parquet(OUTPUT_DIR / "postcodes_h3.parquet")
|
||||||
|
print(f"Loaded {postcodes_df.height:,} postcodes")
|
||||||
|
|
||||||
|
# Filter to postcodes within range of destination
|
||||||
|
postcodes_df = postcodes_df.with_columns(
|
||||||
|
haversine_km_expr("lat", "long", destination.lat, destination.lon).alias(
|
||||||
|
"distance_km"
|
||||||
|
)
|
||||||
|
).filter(pl.col("distance_km") <= MAX_DISTANCE_KM)
|
||||||
|
|
||||||
|
print(f"Filtered to {postcodes_df.height:,} postcodes within {MAX_DISTANCE_KM}km")
|
||||||
|
|
||||||
|
postcode_data = list(
|
||||||
|
zip(
|
||||||
|
postcodes_df["postcode"].to_list(),
|
||||||
|
postcodes_df["lat"].to_list(),
|
||||||
|
postcodes_df["long"].to_list(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if MAX_POSTCODES is not None and len(postcode_data) > MAX_POSTCODES:
|
||||||
|
postcode_data = random.sample(postcode_data, MAX_POSTCODES)
|
||||||
|
print(f"Randomly sampled {MAX_POSTCODES} postcodes")
|
||||||
|
|
||||||
|
checkpoint_saver = CheckpointSaver(
|
||||||
|
destination_name=destination.name,
|
||||||
|
on_save=lambda path, count: print(
|
||||||
|
f"Checkpoint saved: {count:,} results to {path}"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 25556/76273
|
||||||
|
|
||||||
|
# Resume from checkpoint if one exists
|
||||||
|
checkpoint_path = checkpoint_saver._checkpoint_path()
|
||||||
|
prior_results: list[JourneyResult] = []
|
||||||
|
if checkpoint_path.exists():
|
||||||
|
checkpoint_df = pl.read_parquet(checkpoint_path)
|
||||||
|
# Deduplicate checkpoint rows per postcode, preferring rows with data
|
||||||
|
checkpoint_df = (
|
||||||
|
checkpoint_df.sort("public_transport_quick_minutes", nulls_last=True)
|
||||||
|
.unique(subset=["postcode"], keep="first")
|
||||||
|
)
|
||||||
|
completed_postcodes = set(checkpoint_df["postcode"].to_list())
|
||||||
|
prior_results = [
|
||||||
|
JourneyResult(
|
||||||
|
postcode=row["postcode"],
|
||||||
|
public_transport_easy_minutes=row["public_transport_easy_minutes"],
|
||||||
|
public_transport_quick_minutes=row["public_transport_quick_minutes"],
|
||||||
|
cycling_minutes=row["cycling_minutes"],
|
||||||
|
error=row["error"],
|
||||||
|
)
|
||||||
|
for row in checkpoint_df.iter_rows(named=True)
|
||||||
|
]
|
||||||
|
checkpoint_saver.results = prior_results
|
||||||
|
checkpoint_saver._last_save_count = len(prior_results)
|
||||||
|
postcode_data = [
|
||||||
|
(pc, lat, lon)
|
||||||
|
for pc, lat, lon in postcode_data
|
||||||
|
if pc not in completed_postcodes
|
||||||
|
]
|
||||||
|
print(
|
||||||
|
f"Resumed from checkpoint: {len(prior_results):,} already done, "
|
||||||
|
f"{len(postcode_data):,} remaining"
|
||||||
|
)
|
||||||
|
|
||||||
|
def on_result(result):
|
||||||
|
pbar.update(1)
|
||||||
|
checkpoint_saver.add_result(result)
|
||||||
|
|
||||||
|
with tqdm(total=len(postcode_data), desc="Fetching journeys") as pbar:
|
||||||
|
new_results = asyncio.run(
|
||||||
|
fetch_journey_times(
|
||||||
|
postcode_data,
|
||||||
|
destination,
|
||||||
|
journey_date.strftime("%Y%m%d"),
|
||||||
|
journey_time,
|
||||||
|
MAX_CONCURRENT,
|
||||||
|
progress_callback=on_result,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
all_results = prior_results + new_results
|
||||||
|
results_df = results_to_dataframe(all_results)
|
||||||
|
|
||||||
|
all_postcodes = {r.postcode for r in all_results}
|
||||||
|
coords_df = postcodes_df.filter(
|
||||||
|
pl.col("postcode").is_in(all_postcodes)
|
||||||
|
).select(["postcode", "lat", "long"])
|
||||||
|
results_df = coords_df.join(results_df, on="postcode", how="left")
|
||||||
|
|
||||||
|
results_df = results_df.with_columns(
|
||||||
|
pl.lit(destination.name).alias("destination"),
|
||||||
|
pl.lit(journey_date.strftime("%Y-%m-%d")).alias("journey_date"),
|
||||||
|
pl.lit(f"{journey_time[:2]}:{journey_time[2:]}").alias("journey_time"),
|
||||||
|
)
|
||||||
|
|
||||||
|
successful = results_df.filter(pl.col("cycling_minutes").is_not_null()).height
|
||||||
|
print(f"Completed: {successful}/{len(all_results)} successful")
|
||||||
|
|
||||||
|
parquet_path = save_results(results_df, destination.name)
|
||||||
|
checkpoint_saver.cleanup_checkpoint()
|
||||||
|
print(f"Saved to {parquet_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
23
pipeline/journey_times/config.py
Normal file
23
pipeline/journey_times/config.py
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
"""Configuration constants for journey times processing."""
|
||||||
|
|
||||||
|
from .models import Destination
|
||||||
|
|
||||||
|
|
||||||
|
MAX_DELAY = 10
|
||||||
|
REQUESTS_PER_MIN = 500
|
||||||
|
MAX_POSTCODES = None
|
||||||
|
MAX_CONCURRENT = 80
|
||||||
|
MAX_DISTANCE_KM = 110
|
||||||
|
CHECKPOINT_INTERVAL = 10000
|
||||||
|
|
||||||
|
|
||||||
|
DESTINATIONS = {
|
||||||
|
"bank": Destination(51.5133, -0.0886, "Bank", "940GZZLUBNK"),
|
||||||
|
"waterloo": Destination(51.5031, -0.1132, "Waterloo", "940GZZLUWLO"),
|
||||||
|
"kings-cross": Destination(51.5308, -0.1238, "King's Cross", "940GZZLUKSX"),
|
||||||
|
"liverpool-street": Destination(
|
||||||
|
51.5178, -0.0823, "Liverpool Street", "940GZZLULVS"
|
||||||
|
),
|
||||||
|
"paddington": Destination(51.5154, -0.1755, "Paddington", "940GZZLUPAC"),
|
||||||
|
"victoria": Destination(51.4965, -0.1447, "Victoria", "940GZZLUVIC"),
|
||||||
|
}
|
||||||
30
pipeline/journey_times/models.py
Normal file
30
pipeline/journey_times/models.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
"""Data models for journey times processing."""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Destination:
|
||||||
|
"""A destination point for journey planning."""
|
||||||
|
|
||||||
|
lat: float
|
||||||
|
lon: float
|
||||||
|
name: str
|
||||||
|
naptan_id: str | None = None
|
||||||
|
|
||||||
|
def to_tfl_location(self) -> str:
|
||||||
|
"""Convert to TfL API location string."""
|
||||||
|
if self.naptan_id:
|
||||||
|
return self.naptan_id
|
||||||
|
return f"{self.lat},{self.lon}"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class JourneyResult:
|
||||||
|
"""Result of a journey time calculation for a postcode."""
|
||||||
|
|
||||||
|
postcode: str
|
||||||
|
public_transport_easy_minutes: int | None = None
|
||||||
|
cycling_minutes: int | None = None
|
||||||
|
public_transport_quick_minutes: int | None = None
|
||||||
|
error: str | None = None
|
||||||
35
pipeline/journey_times/rate_limiter.py
Normal file
35
pipeline/journey_times/rate_limiter.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
"""Rate limiting for TfL API requests."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from .config import REQUESTS_PER_MIN
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter enforcing max requests per minute."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.request_times: list[float] = []
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self):
|
||||||
|
"""Wait until we can make a request within rate limits."""
|
||||||
|
async with self._lock:
|
||||||
|
now = asyncio.get_event_loop().time()
|
||||||
|
cutoff = now - 10.0 # 10 seconds
|
||||||
|
self.request_times = [t for t in self.request_times if t > cutoff]
|
||||||
|
|
||||||
|
if (
|
||||||
|
len(self.request_times) >= REQUESTS_PER_MIN // 6
|
||||||
|
): # we look at it every 10 seconds instead of minutes
|
||||||
|
wait_time = self.request_times[0] - cutoff
|
||||||
|
if wait_time > 0:
|
||||||
|
warnings.warn(
|
||||||
|
f"Rate limit reached ({REQUESTS_PER_MIN}/min), "
|
||||||
|
f"waiting {wait_time:.1f}s",
|
||||||
|
stacklevel=1,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
|
||||||
|
self.request_times.append(asyncio.get_event_loop().time())
|
||||||
85
pipeline/journey_times/results.py
Normal file
85
pipeline/journey_times/results.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from .config import CHECKPOINT_INTERVAL, OUTPUT_DIR
|
||||||
|
from .models import JourneyResult
|
||||||
|
|
||||||
|
|
||||||
|
def results_to_dataframe(results: list[JourneyResult]) -> pl.DataFrame:
|
||||||
|
return pl.DataFrame(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"postcode": r.postcode,
|
||||||
|
"public_transport_easy_minutes": r.public_transport_easy_minutes,
|
||||||
|
"public_transport_quick_minutes": r.public_transport_quick_minutes,
|
||||||
|
"cycling_minutes": r.cycling_minutes,
|
||||||
|
"error": r.error,
|
||||||
|
}
|
||||||
|
for r in results
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CheckpointSaver:
|
||||||
|
"""Collects results and saves checkpoints at regular intervals."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
destination_name: str,
|
||||||
|
output_dir: Path | None = None,
|
||||||
|
interval: int = CHECKPOINT_INTERVAL,
|
||||||
|
on_save: Callable[[Path, int], None] | None = None,
|
||||||
|
):
|
||||||
|
self.destination_name = destination_name
|
||||||
|
self.output_dir = output_dir or OUTPUT_DIR
|
||||||
|
self.interval = interval
|
||||||
|
self.on_save = on_save
|
||||||
|
self.results: list[JourneyResult] = []
|
||||||
|
self._last_save_count = 0
|
||||||
|
|
||||||
|
def add_result(self, result: JourneyResult) -> None:
|
||||||
|
"""Add a result and save checkpoint if interval is reached."""
|
||||||
|
self.results.append(result)
|
||||||
|
if len(self.results) - self._last_save_count >= self.interval:
|
||||||
|
self.save_checkpoint()
|
||||||
|
|
||||||
|
def save_checkpoint(self) -> Path:
|
||||||
|
"""Save current results to checkpoint file."""
|
||||||
|
df = results_to_dataframe(self.results)
|
||||||
|
path = self._checkpoint_path()
|
||||||
|
df.write_parquet(path)
|
||||||
|
self._last_save_count = len(self.results)
|
||||||
|
if self.on_save:
|
||||||
|
self.on_save(path, len(self.results))
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _checkpoint_path(self) -> Path:
|
||||||
|
safe_name = self.destination_name.lower().replace(" ", "-")
|
||||||
|
return self.output_dir / f"journey_times_{safe_name}_checkpoint.parquet"
|
||||||
|
|
||||||
|
def get_results(self) -> list[JourneyResult]:
|
||||||
|
"""Return all collected results."""
|
||||||
|
return self.results
|
||||||
|
|
||||||
|
def cleanup_checkpoint(self) -> None:
|
||||||
|
"""Remove the checkpoint file after successful completion."""
|
||||||
|
path = self._checkpoint_path()
|
||||||
|
if path.exists():
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def save_results(
|
||||||
|
results: pl.DataFrame,
|
||||||
|
destination_name: str,
|
||||||
|
output_dir: Path | None = None,
|
||||||
|
) -> Path:
|
||||||
|
if output_dir is None:
|
||||||
|
output_dir = OUTPUT_DIR
|
||||||
|
|
||||||
|
safe_name = destination_name.lower().replace(" ", "-")
|
||||||
|
parquet_path = output_dir / f"journey_times_{safe_name}.parquet"
|
||||||
|
results.write_parquet(parquet_path)
|
||||||
|
|
||||||
|
return parquet_path
|
||||||
254
pipeline/journey_times/tfl_client.py
Normal file
254
pipeline/journey_times/tfl_client.py
Normal file
|
|
@ -0,0 +1,254 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from typing import Literal
|
||||||
|
import warnings
|
||||||
|
from collections.abc import Callable
|
||||||
|
from http import HTTPStatus
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from .config import MAX_DELAY
|
||||||
|
from .models import Destination, JourneyResult
|
||||||
|
from .rate_limiter import RateLimiter
|
||||||
|
|
||||||
|
|
||||||
|
BASE_URL = "https://api.tfl.gov.uk"
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_journey_for_mode(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
rate_limiter: RateLimiter,
|
||||||
|
from_location: str,
|
||||||
|
to_location: str,
|
||||||
|
journey_date: str,
|
||||||
|
journey_time: str,
|
||||||
|
journey_type: Literal["quick"] | Literal["easy"] | Literal["cycle"],
|
||||||
|
retry_count: int = 5,
|
||||||
|
) -> int | None:
|
||||||
|
"""Fetch journey time for a specific mode with rate limiting."""
|
||||||
|
backoff = 1.0
|
||||||
|
for attempt in range(retry_count):
|
||||||
|
try:
|
||||||
|
await rate_limiter.acquire()
|
||||||
|
|
||||||
|
journey_preference = {
|
||||||
|
"quick": "LeastTime",
|
||||||
|
"easy": "LeastInterchange",
|
||||||
|
"cycle": None,
|
||||||
|
}[journey_type]
|
||||||
|
|
||||||
|
cycle_preference = {
|
||||||
|
"quick": None,
|
||||||
|
"easy": None,
|
||||||
|
"cycle": "AllTheWay",
|
||||||
|
}[journey_type]
|
||||||
|
|
||||||
|
# curl -s "https://api.tfl.gov.uk/Journey/Meta/Modes" | jq '.[].modeName'
|
||||||
|
mode = {
|
||||||
|
"quick": [
|
||||||
|
"bus",
|
||||||
|
"overground",
|
||||||
|
"national-rail",
|
||||||
|
"international-rail",
|
||||||
|
"elizabeth-line",
|
||||||
|
"tube",
|
||||||
|
"coach",
|
||||||
|
"dlr",
|
||||||
|
"cable-car",
|
||||||
|
"replacement-bus",
|
||||||
|
"tram",
|
||||||
|
"river-bus",
|
||||||
|
"walking",
|
||||||
|
"cycle",
|
||||||
|
],
|
||||||
|
"easy": [
|
||||||
|
"bus",
|
||||||
|
"overground",
|
||||||
|
"national-rail",
|
||||||
|
"international-rail",
|
||||||
|
"elizabeth-line",
|
||||||
|
"replacement-bus",
|
||||||
|
"tube",
|
||||||
|
"coach",
|
||||||
|
"dlr",
|
||||||
|
"cable-car",
|
||||||
|
"tram",
|
||||||
|
"river-bus",
|
||||||
|
],
|
||||||
|
"cycle": ["cycle"],
|
||||||
|
}[journey_type]
|
||||||
|
|
||||||
|
params: dict = {
|
||||||
|
"date": journey_date,
|
||||||
|
"time": journey_time,
|
||||||
|
"nationalSearch": "true",
|
||||||
|
"timeIs": "Arriving",
|
||||||
|
"cyclePreference": cycle_preference,
|
||||||
|
"bikeProficiency": "Fast",
|
||||||
|
"walkingOptimization": str(journey_type == "quick").lower(),
|
||||||
|
"mode": ",".join(mode),
|
||||||
|
}
|
||||||
|
if journey_preference:
|
||||||
|
params["journeyPreference"] = journey_preference
|
||||||
|
|
||||||
|
url = f"/Journey/JourneyResults/{from_location}/to/{to_location}"
|
||||||
|
response = await client.get(url, params=params)
|
||||||
|
|
||||||
|
if response.status_code == HTTPStatus.OK:
|
||||||
|
data = response.json()
|
||||||
|
journeys = data.get("journeys", [])
|
||||||
|
if journeys:
|
||||||
|
durations = [
|
||||||
|
j["duration"] for j in journeys if j.get("duration") is not None
|
||||||
|
]
|
||||||
|
if durations:
|
||||||
|
return min(durations)
|
||||||
|
return None
|
||||||
|
elif response.status_code in (
|
||||||
|
HTTPStatus.TOO_MANY_REQUESTS,
|
||||||
|
HTTPStatus.INTERNAL_SERVER_ERROR,
|
||||||
|
HTTPStatus.BAD_GATEWAY,
|
||||||
|
HTTPStatus.SERVICE_UNAVAILABLE,
|
||||||
|
HTTPStatus.GATEWAY_TIMEOUT,
|
||||||
|
):
|
||||||
|
warnings.warn(
|
||||||
|
f"HTTP {response.status_code} for {journey_type} from {from_location}, "
|
||||||
|
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(backoff)
|
||||||
|
backoff = min(backoff * 2, MAX_DELAY)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
warnings.warn(
|
||||||
|
f"Network error for {journey_type} from {from_location}: {e}, "
|
||||||
|
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(backoff)
|
||||||
|
backoff = min(backoff * 2, MAX_DELAY)
|
||||||
|
continue
|
||||||
|
warnings.warn(
|
||||||
|
f"Failed to fetch {journey_type} from {from_location} after {retry_count} attempts",
|
||||||
|
stacklevel=2,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_all_modes(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
rate_limiter: RateLimiter,
|
||||||
|
postcode: str,
|
||||||
|
lat: float,
|
||||||
|
lon: float,
|
||||||
|
to_location: str,
|
||||||
|
journey_date: str,
|
||||||
|
journey_time: str,
|
||||||
|
semaphore: asyncio.Semaphore,
|
||||||
|
) -> JourneyResult:
|
||||||
|
"""Fetch journey times for all transport modes using coordinates."""
|
||||||
|
async with semaphore:
|
||||||
|
try:
|
||||||
|
from_location = f"{lat},{lon}"
|
||||||
|
|
||||||
|
easy = await fetch_journey_for_mode(
|
||||||
|
client,
|
||||||
|
rate_limiter,
|
||||||
|
from_location,
|
||||||
|
to_location,
|
||||||
|
journey_date,
|
||||||
|
journey_time,
|
||||||
|
journey_type="easy",
|
||||||
|
)
|
||||||
|
quick = await fetch_journey_for_mode(
|
||||||
|
client,
|
||||||
|
rate_limiter,
|
||||||
|
from_location,
|
||||||
|
to_location,
|
||||||
|
journey_date,
|
||||||
|
journey_time,
|
||||||
|
journey_type="quick",
|
||||||
|
)
|
||||||
|
cycling = await fetch_journey_for_mode(
|
||||||
|
client,
|
||||||
|
rate_limiter,
|
||||||
|
from_location,
|
||||||
|
to_location,
|
||||||
|
journey_date,
|
||||||
|
journey_time,
|
||||||
|
journey_type="cycle",
|
||||||
|
)
|
||||||
|
|
||||||
|
return JourneyResult(
|
||||||
|
postcode=postcode,
|
||||||
|
public_transport_easy_minutes=easy,
|
||||||
|
public_transport_quick_minutes=quick,
|
||||||
|
cycling_minutes=cycling,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
return JourneyResult(postcode=postcode, error=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_journey_times(
|
||||||
|
postcode_data: list[tuple[str, float, float]],
|
||||||
|
dest: Destination,
|
||||||
|
journey_date: str,
|
||||||
|
journey_time: str,
|
||||||
|
max_concurrent: int = 2,
|
||||||
|
progress_callback: Callable[[JourneyResult], None] | None = None,
|
||||||
|
) -> list[JourneyResult]:
|
||||||
|
"""Fetch journey times for all postcodes with rate limiting.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
postcode_data: List of (postcode, lat, lon) tuples
|
||||||
|
dest: Destination for journey planning
|
||||||
|
journey_date: Date in YYYYMMDD format
|
||||||
|
journey_time: Time in HHMM format
|
||||||
|
max_concurrent: Maximum concurrent API requests
|
||||||
|
progress_callback: Optional callback called with each result
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of JourneyResult objects in the same order as postcode_data
|
||||||
|
"""
|
||||||
|
semaphore = asyncio.Semaphore(max_concurrent)
|
||||||
|
to_location = dest.to_tfl_location()
|
||||||
|
rate_limiter = RateLimiter()
|
||||||
|
|
||||||
|
# TFL API authentication via app_key query parameter
|
||||||
|
tfl_token = os.environ.get("TFL_TOKEN")
|
||||||
|
if not tfl_token:
|
||||||
|
raise RuntimeError("TFL_TOKEN environment variable not set")
|
||||||
|
params = {"app_key": tfl_token}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
base_url=BASE_URL,
|
||||||
|
params=params,
|
||||||
|
timeout=httpx.Timeout(30),
|
||||||
|
) as client:
|
||||||
|
tasks = [
|
||||||
|
fetch_all_modes(
|
||||||
|
client,
|
||||||
|
rate_limiter,
|
||||||
|
pc,
|
||||||
|
lat,
|
||||||
|
lon,
|
||||||
|
to_location,
|
||||||
|
journey_date,
|
||||||
|
journey_time,
|
||||||
|
semaphore,
|
||||||
|
)
|
||||||
|
for pc, lat, lon in postcode_data
|
||||||
|
]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for coro in asyncio.as_completed(tasks):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(result)
|
||||||
|
|
||||||
|
postcode_to_result = {r.postcode: r for r in results}
|
||||||
|
return [postcode_to_result[pc] for pc, _, _ in postcode_data]
|
||||||
|
|
@ -1,42 +0,0 @@
|
||||||
from pathlib import Path
|
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
from pipeline.config import AGGREGATES_DIR, H3_RESOLUTIONS
|
|
||||||
|
|
||||||
|
|
||||||
def aggregate(df: pl.LazyFrame, resolution: int) -> pl.LazyFrame:
|
|
||||||
"""Aggregate property data by H3 cell and year."""
|
|
||||||
h3_col = f"h3_res{resolution}"
|
|
||||||
|
|
||||||
return (
|
|
||||||
df.group_by(h3_col, "year")
|
|
||||||
.agg(
|
|
||||||
pl.len().alias("count"),
|
|
||||||
pl.col("price").mean().alias("avg_price"),
|
|
||||||
pl.col("price").median().alias("median_price"),
|
|
||||||
pl.col("price").min().alias("min_price"),
|
|
||||||
pl.col("price").max().alias("max_price"),
|
|
||||||
)
|
|
||||||
.rename({h3_col: "h3"})
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def aggregate_all(df: pl.LazyFrame) -> dict[int, pl.LazyFrame]:
|
|
||||||
"""Aggregate at all H3 resolutions."""
|
|
||||||
return {res: aggregate(df, res) for res in H3_RESOLUTIONS}
|
|
||||||
|
|
||||||
|
|
||||||
def save_aggregates(df: pl.LazyFrame, output_dir: Path | None = None) -> list[Path]:
|
|
||||||
"""Aggregate and save at all H3 resolutions."""
|
|
||||||
output_dir = output_dir or AGGREGATES_DIR
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
saved_paths = []
|
|
||||||
aggregates = aggregate_all(df)
|
|
||||||
|
|
||||||
for res, agg_df in aggregates.items():
|
|
||||||
output_path = output_dir / f"res{res}.parquet"
|
|
||||||
agg_df.collect().write_parquet(output_path)
|
|
||||||
saved_paths.append(output_path)
|
|
||||||
|
|
||||||
return saved_paths
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
"""Pipeline CLI to process property data with H3 spatial indexing."""
|
|
||||||
|
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
from pipeline.sources.postcodes import save_postcodes
|
|
||||||
from pipeline.sources.property_prices import PropertyPricesSource
|
|
||||||
from pipeline.processors.h3_aggregator import save_aggregates
|
|
||||||
|
|
||||||
|
|
||||||
def run_pipeline():
|
|
||||||
"""Run the full data processing pipeline."""
|
|
||||||
print("=" * 60)
|
|
||||||
print("Property Map Data Pipeline")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
# Step 1: Process postcodes with H3 indices
|
|
||||||
print("\n[1/3] Processing postcodes with H3 indices...")
|
|
||||||
postcodes_path = save_postcodes()
|
|
||||||
print(f" Saved: {postcodes_path}")
|
|
||||||
|
|
||||||
print("\n[2/3] Processing property prices...")
|
|
||||||
postcodes = pl.scan_parquet(postcodes_path)
|
|
||||||
property_source = PropertyPricesSource()
|
|
||||||
properties = property_source.process(postcodes)
|
|
||||||
print(" Joined property prices with postcodes")
|
|
||||||
|
|
||||||
print("\n[3/3] Aggregating at H3 resolutions...")
|
|
||||||
saved_paths = save_aggregates(properties)
|
|
||||||
for path in saved_paths:
|
|
||||||
size_mb = path.stat().st_size / (1024 * 1024)
|
|
||||||
print(f" Saved: {path.name} ({size_mb:.1f} MB)")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run_pipeline()
|
|
||||||
|
|
@ -1,49 +0,0 @@
|
||||||
from pathlib import Path
|
|
||||||
import polars as pl
|
|
||||||
import h3
|
|
||||||
|
|
||||||
from pipeline.config import DATA_DIR, H3_RESOLUTIONS, PROCESSED_DIR
|
|
||||||
|
|
||||||
|
|
||||||
def lat_long_to_h3(lat: float, long: float, resolution: int) -> str:
|
|
||||||
"""Convert lat/long to H3 index at given resolution."""
|
|
||||||
return h3.latlng_to_cell(lat, long, resolution)
|
|
||||||
|
|
||||||
|
|
||||||
def load_postcodes() -> pl.LazyFrame:
|
|
||||||
"""Load postcode data from arcgis parquet file."""
|
|
||||||
return pl.scan_parquet(DATA_DIR / "arcgis_data.parquet").select(
|
|
||||||
pl.col("pcds").alias("postcode"),
|
|
||||||
pl.col("lat"),
|
|
||||||
pl.col("long"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def process_postcodes() -> pl.LazyFrame:
|
|
||||||
"""Process postcodes and add H3 indices at multiple resolutions."""
|
|
||||||
df = load_postcodes().collect()
|
|
||||||
|
|
||||||
for res in H3_RESOLUTIONS:
|
|
||||||
col_name = f"h3_res{res}"
|
|
||||||
df = df.with_columns(
|
|
||||||
pl.struct(["lat", "long"])
|
|
||||||
.map_elements(
|
|
||||||
# Capture res by value using default argument to avoid closure bug
|
|
||||||
lambda x, res=res: lat_long_to_h3(x["lat"], x["long"], res),
|
|
||||||
return_dtype=pl.Utf8,
|
|
||||||
)
|
|
||||||
.alias(col_name)
|
|
||||||
)
|
|
||||||
|
|
||||||
return df.lazy()
|
|
||||||
|
|
||||||
|
|
||||||
def save_postcodes(output_path: Path | None = None) -> Path:
|
|
||||||
"""Process and save postcodes with H3 indices."""
|
|
||||||
output_path = output_path or PROCESSED_DIR / "postcodes_h3.parquet"
|
|
||||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
df = process_postcodes().collect()
|
|
||||||
df.write_parquet(output_path)
|
|
||||||
|
|
||||||
return output_path
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
||||||
import polars as pl
|
|
||||||
|
|
||||||
from pipeline.base import DataSource
|
|
||||||
from pipeline.config import DATA_DIR, H3_RESOLUTIONS
|
|
||||||
|
|
||||||
|
|
||||||
class PropertyPricesSource(DataSource):
|
|
||||||
"""Land Registry property prices data source."""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> str:
|
|
||||||
return "property_prices"
|
|
||||||
|
|
||||||
def load(self) -> pl.LazyFrame:
|
|
||||||
"""Load raw property prices data."""
|
|
||||||
return pl.scan_parquet(DATA_DIR / "pp-complete.parquet")
|
|
||||||
|
|
||||||
def process(self, postcodes: pl.LazyFrame) -> pl.LazyFrame:
|
|
||||||
"""Process and join with postcode coordinates and H3 indices."""
|
|
||||||
prices = self.load().select(
|
|
||||||
pl.col("price"),
|
|
||||||
pl.col("date_of_transfer").dt.year().alias("year"),
|
|
||||||
pl.col("property_type"),
|
|
||||||
pl.col("postcode"),
|
|
||||||
)
|
|
||||||
|
|
||||||
joined = prices.join(
|
|
||||||
postcodes,
|
|
||||||
on="postcode",
|
|
||||||
how="inner",
|
|
||||||
)
|
|
||||||
|
|
||||||
h3_cols = [pl.col(f"h3_res{res}") for res in H3_RESOLUTIONS]
|
|
||||||
return joined.select(
|
|
||||||
pl.col("price"),
|
|
||||||
pl.col("year"),
|
|
||||||
pl.col("property_type"),
|
|
||||||
pl.col("lat"),
|
|
||||||
pl.col("long"),
|
|
||||||
*h3_cols,
|
|
||||||
)
|
|
||||||
63
pipeline/transform/crime.py
Normal file
63
pipeline/transform/crime.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
|
||||||
|
def transform_crime(crime_dir: Path, output_path: Path) -> None:
|
||||||
|
csvs = sorted(crime_dir.rglob("*.csv"))
|
||||||
|
print(f"Found {len(csvs)} CSV files across {len(list(crime_dir.iterdir()))} months")
|
||||||
|
|
||||||
|
df = pl.scan_csv(
|
||||||
|
csvs,
|
||||||
|
schema_overrides={"LSOA code": pl.Utf8, "Crime type": pl.Utf8, "Month": pl.Utf8},
|
||||||
|
).select("LSOA code", "Crime type", "Month")
|
||||||
|
|
||||||
|
# Extract year, count crimes per LSOA / year / crime type
|
||||||
|
yearly_counts = (
|
||||||
|
df.filter(pl.col("LSOA code").is_not_null() & (pl.col("LSOA code") != ""))
|
||||||
|
.with_columns(pl.col("Month").str.slice(0, 4).alias("year"))
|
||||||
|
.group_by("LSOA code", "year", "Crime type")
|
||||||
|
.agg(pl.len().alias("count"))
|
||||||
|
.group_by("LSOA code", "Crime type")
|
||||||
|
.agg(pl.col("count").mean().round(1).alias("yearly_avg"))
|
||||||
|
.collect(engine="streaming")
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Crime types: {sorted(yearly_counts['Crime type'].unique().to_list())}")
|
||||||
|
|
||||||
|
# Pivot crime types into columns
|
||||||
|
wide = yearly_counts.pivot(
|
||||||
|
on="Crime type",
|
||||||
|
index="LSOA code",
|
||||||
|
values="yearly_avg",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fill nulls with 0 and rename columns to be descriptive
|
||||||
|
value_cols = [col for col in wide.columns if col != "LSOA code"]
|
||||||
|
wide = wide.with_columns(pl.col(col).fill_null(0) for col in value_cols)
|
||||||
|
wide = wide.rename({col: f"{col} (avg/yr)" for col in value_cols})
|
||||||
|
|
||||||
|
print(f"Output shape: {wide.shape}")
|
||||||
|
print(f"Columns: {wide.columns}")
|
||||||
|
|
||||||
|
wide.write_parquet(output_path, compression="zstd")
|
||||||
|
print(f"Saved to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Transform crime CSVs into yearly average by LSOA and crime type"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input", type=Path, required=True, help="Directory containing crime data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
transform_crime(args.input, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
161
pipeline/transform/join_epc_pp.py
Normal file
161
pipeline/transform/join_epc_pp.py
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
import argparse
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
from ..utils import fuzzy_join_on_postcode
|
||||||
|
|
||||||
|
|
||||||
|
MIN_FLOOR_AREA_M2 = 10
|
||||||
|
|
||||||
|
pl.Config.set_tbl_cols(-1)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Fuzzy join EPC and Price Paid data")
|
||||||
|
parser.add_argument(
|
||||||
|
"--epc", type=Path, required=True, help="EPC certificates CSV file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--price-paid", type=Path, required=True, help="Price paid parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
epc = (
|
||||||
|
pl.scan_csv(args.epc)
|
||||||
|
.select(
|
||||||
|
pl.col("ADDRESS").alias("epc_address"),
|
||||||
|
"POSTCODE",
|
||||||
|
"CURRENT_ENERGY_RATING",
|
||||||
|
"POTENTIAL_ENERGY_RATING",
|
||||||
|
pl.col("PROPERTY_TYPE").alias("epc_property_type"),
|
||||||
|
"BUILT_FORM",
|
||||||
|
"INSPECTION_DATE",
|
||||||
|
"TOTAL_FLOOR_AREA",
|
||||||
|
"NUMBER_HABITABLE_ROOMS",
|
||||||
|
"FLOOR_HEIGHT",
|
||||||
|
"CONSTRUCTION_AGE_BAND",
|
||||||
|
)
|
||||||
|
.filter(pl.col("epc_address").is_not_null())
|
||||||
|
.sort("INSPECTION_DATE", descending=True)
|
||||||
|
.group_by("epc_address", "POSTCODE")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
print("EPC dataset")
|
||||||
|
print(epc.head().collect())
|
||||||
|
|
||||||
|
# https://www.gov.uk/guidance/about-the-price-paid-data
|
||||||
|
property_type_map = {
|
||||||
|
"D": "Detached",
|
||||||
|
"S": "Semi-Detached",
|
||||||
|
"T": "Terraced",
|
||||||
|
"F": "Flats/Maisonettes",
|
||||||
|
"O": "Other",
|
||||||
|
}
|
||||||
|
duration_map = {"F": "Freehold", "L": "Leasehold"}
|
||||||
|
|
||||||
|
price_paid = (
|
||||||
|
pl.scan_parquet(args.price_paid)
|
||||||
|
.select(
|
||||||
|
"price",
|
||||||
|
"date_of_transfer",
|
||||||
|
pl.col("property_type")
|
||||||
|
.alias("pp_property_type")
|
||||||
|
.replace(property_type_map),
|
||||||
|
"postcode",
|
||||||
|
"paon",
|
||||||
|
"saon",
|
||||||
|
"street",
|
||||||
|
"locality",
|
||||||
|
"town_city",
|
||||||
|
pl.col("duration").replace(duration_map),
|
||||||
|
"old_new",
|
||||||
|
)
|
||||||
|
.filter(pl.col("pp_property_type") != "Other")
|
||||||
|
.with_columns(
|
||||||
|
pl.concat_str(
|
||||||
|
[pl.col("saon"), pl.col("paon"), pl.col("street")],
|
||||||
|
separator=" ",
|
||||||
|
ignore_nulls=True,
|
||||||
|
).alias("pp_address"),
|
||||||
|
)
|
||||||
|
.sort("date_of_transfer")
|
||||||
|
.group_by("pp_address", "postcode", maintain_order=True)
|
||||||
|
.agg(
|
||||||
|
pl.struct(
|
||||||
|
pl.col("date_of_transfer").dt.year().alias("year"),
|
||||||
|
"price",
|
||||||
|
).alias("historical_prices"),
|
||||||
|
pl.col("pp_property_type").last(),
|
||||||
|
pl.col("duration").last(),
|
||||||
|
pl.col("price").last().alias("latest_price"),
|
||||||
|
pl.col("date_of_transfer").last(),
|
||||||
|
pl.col("date_of_transfer").first().alias("first_transfer_date"),
|
||||||
|
pl.col("old_new").first(),
|
||||||
|
)
|
||||||
|
).filter(pl.col("pp_address").is_not_null())
|
||||||
|
|
||||||
|
print("Price paid dataset")
|
||||||
|
print(price_paid.head().collect())
|
||||||
|
|
||||||
|
joined = (
|
||||||
|
fuzzy_join_on_postcode(
|
||||||
|
left=price_paid,
|
||||||
|
right=epc,
|
||||||
|
left_address_col="pp_address",
|
||||||
|
right_address_col="epc_address",
|
||||||
|
left_postcode_col="postcode",
|
||||||
|
right_postcode_col="POSTCODE",
|
||||||
|
)
|
||||||
|
.drop("POSTCODE")
|
||||||
|
.collect(engine="streaming")
|
||||||
|
)
|
||||||
|
|
||||||
|
matched = joined.filter(
|
||||||
|
pl.col("epc_address").is_not_null() & pl.col("pp_address").is_not_null()
|
||||||
|
)
|
||||||
|
total = joined.height
|
||||||
|
print(f"Unique properties: {total}")
|
||||||
|
print(f"Matched: {matched.height} ({100 * matched.height / total:.1f}%)")
|
||||||
|
print(f"Unmatched: {total - matched.height}")
|
||||||
|
|
||||||
|
matched = matched.filter(pl.col("TOTAL_FLOOR_AREA") >= MIN_FLOOR_AREA_M2)
|
||||||
|
|
||||||
|
# For new-builds (old_new == "Y"), use the first transaction date year as
|
||||||
|
# the exact construction date; otherwise fall back to the EPC age band.
|
||||||
|
epc_band_year = (
|
||||||
|
pl.col("CONSTRUCTION_AGE_BAND")
|
||||||
|
.str.replace("England and Wales: ", "")
|
||||||
|
.str.replace(" onwards", "")
|
||||||
|
.str.extract(r"(\d{4})", 1)
|
||||||
|
.cast(pl.UInt16, strict=False)
|
||||||
|
)
|
||||||
|
transfer_year = (
|
||||||
|
pl.col("first_transfer_date").dt.year().cast(pl.UInt16, strict=False)
|
||||||
|
)
|
||||||
|
is_new_build = pl.col("old_new") == "Y"
|
||||||
|
|
||||||
|
matched = matched.with_columns(
|
||||||
|
pl.when(is_new_build & transfer_year.is_not_null())
|
||||||
|
.then(transfer_year)
|
||||||
|
.otherwise(epc_band_year)
|
||||||
|
.alias("CONSTRUCTION_AGE_BAND"),
|
||||||
|
pl.when(is_new_build & transfer_year.is_not_null())
|
||||||
|
.then(pl.lit(0, dtype=pl.UInt8))
|
||||||
|
.when(epc_band_year.is_not_null())
|
||||||
|
.then(pl.lit(1, dtype=pl.UInt8))
|
||||||
|
.otherwise(pl.lit(None, dtype=pl.UInt8))
|
||||||
|
.alias("is_construction_date_approximate"),
|
||||||
|
).drop("old_new", "first_transfer_date")
|
||||||
|
|
||||||
|
matched = matched.rename({col: col.lower() for col in joined.columns})
|
||||||
|
|
||||||
|
print(matched.head())
|
||||||
|
matched.write_parquet(args.output)
|
||||||
|
print(f"Wrote {args.output}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
290
pipeline/transform/merge.py
Normal file
290
pipeline/transform/merge.py
Normal file
|
|
@ -0,0 +1,290 @@
|
||||||
|
import argparse
|
||||||
|
import polars as pl
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
MIN_PRICE = 10_000
|
||||||
|
MIN_FLOOR_AREA_M2 = 10
|
||||||
|
|
||||||
|
|
||||||
|
def _build_wide(
|
||||||
|
epc_pp_path: Path,
|
||||||
|
arcgis_path: Path,
|
||||||
|
iod_path: Path,
|
||||||
|
poi_proximity_path: Path,
|
||||||
|
journey_times_path: Path,
|
||||||
|
ethnicity_path: Path,
|
||||||
|
crime_path: Path,
|
||||||
|
noise_path: Path,
|
||||||
|
school_proximity_path: Path,
|
||||||
|
broadband_path: Path,
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
"""Build the wide dataframe by joining epc_pp with all auxiliary data."""
|
||||||
|
wide = pl.scan_parquet(epc_pp_path)
|
||||||
|
|
||||||
|
arcgis = pl.scan_parquet(arcgis_path).select(
|
||||||
|
pl.col("pcds").alias("postcode"),
|
||||||
|
"lat",
|
||||||
|
pl.col("long").alias("lon"),
|
||||||
|
"lsoa21",
|
||||||
|
"oa21",
|
||||||
|
)
|
||||||
|
wide = wide.join(arcgis, on="postcode", how="inner")
|
||||||
|
|
||||||
|
journey_times = (
|
||||||
|
pl.scan_parquet(journey_times_path)
|
||||||
|
.select(
|
||||||
|
"postcode",
|
||||||
|
"public_transport_easy_minutes",
|
||||||
|
"public_transport_quick_minutes",
|
||||||
|
"cycling_minutes",
|
||||||
|
)
|
||||||
|
.sort("public_transport_quick_minutes", nulls_last=True)
|
||||||
|
.group_by("postcode")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
wide = wide.join(journey_times, on="postcode", how="left")
|
||||||
|
|
||||||
|
iod = pl.scan_parquet(iod_path)
|
||||||
|
wide = wide.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
|
||||||
|
|
||||||
|
ethnicity = pl.scan_parquet(ethnicity_path)
|
||||||
|
wide = wide.join(
|
||||||
|
ethnicity,
|
||||||
|
left_on="Local Authority District code (2024)",
|
||||||
|
right_on="Geography_code",
|
||||||
|
how="left",
|
||||||
|
)
|
||||||
|
|
||||||
|
crime = pl.scan_parquet(crime_path)
|
||||||
|
wide = wide.join(crime, left_on="lsoa21", right_on="LSOA code", how="left")
|
||||||
|
|
||||||
|
wide = wide.with_columns(
|
||||||
|
pl.sum_horizontal(
|
||||||
|
"Violence and sexual offences (avg/yr)",
|
||||||
|
"Robbery (avg/yr)",
|
||||||
|
"Burglary (avg/yr)",
|
||||||
|
"Possession of weapons (avg/yr)",
|
||||||
|
).alias("serious_crime_avg_yr"),
|
||||||
|
pl.sum_horizontal(
|
||||||
|
"Anti-social behaviour (avg/yr)",
|
||||||
|
"Criminal damage and arson (avg/yr)",
|
||||||
|
"Shoplifting (avg/yr)",
|
||||||
|
"Bicycle theft (avg/yr)",
|
||||||
|
"Theft from the person (avg/yr)",
|
||||||
|
"Other theft (avg/yr)",
|
||||||
|
"Vehicle crime (avg/yr)",
|
||||||
|
"Public order (avg/yr)",
|
||||||
|
"Drugs (avg/yr)",
|
||||||
|
"Other crime (avg/yr)",
|
||||||
|
).alias("minor_crime_avg_yr"),
|
||||||
|
)
|
||||||
|
|
||||||
|
poi_counts = pl.scan_parquet(poi_proximity_path)
|
||||||
|
wide = wide.join(poi_counts, on="postcode", how="left")
|
||||||
|
|
||||||
|
noise_cols = ["road_noise_lden_db", "rail_noise_lden_db", "airport_noise_lden_db"]
|
||||||
|
noise = (
|
||||||
|
pl.scan_parquet(noise_path)
|
||||||
|
.with_columns(
|
||||||
|
# NaN → null so max_horizontal ignores missing instead of propagating NaN
|
||||||
|
*[pl.col(c).fill_nan(None) for c in noise_cols],
|
||||||
|
)
|
||||||
|
.with_columns(
|
||||||
|
pl.max_horizontal(*noise_cols).fill_null(0).alias("noise_lden_db"),
|
||||||
|
)
|
||||||
|
.select("postcode", "noise_lden_db")
|
||||||
|
)
|
||||||
|
wide = wide.join(noise, on="postcode", how="left")
|
||||||
|
|
||||||
|
school_proximity = pl.scan_parquet(school_proximity_path)
|
||||||
|
wide = wide.join(school_proximity, on="postcode", how="left")
|
||||||
|
|
||||||
|
# Broadband: derive max available download speed tier per postcode from
|
||||||
|
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
|
||||||
|
# UFBB(100) ≥100, SFBB ≥30 Mbps.
|
||||||
|
broadband = (
|
||||||
|
pl.scan_parquet(broadband_path)
|
||||||
|
.select(
|
||||||
|
pl.col("postcode_space").alias("bb_postcode"),
|
||||||
|
pl.when(pl.col("Gigabit availability (% premises)") > 0)
|
||||||
|
.then(1000)
|
||||||
|
.when(pl.col("UFBB availability (% premises)") > 0)
|
||||||
|
.then(300)
|
||||||
|
.when(pl.col("UFBB (100Mbit/s) availability (% premises)") > 0)
|
||||||
|
.then(100)
|
||||||
|
.when(pl.col("SFBB availability (% premises)") > 0)
|
||||||
|
.then(30)
|
||||||
|
.otherwise(10)
|
||||||
|
.cast(pl.UInt16)
|
||||||
|
.alias("max_download_speed"),
|
||||||
|
)
|
||||||
|
.group_by("bb_postcode")
|
||||||
|
.agg(pl.col("max_download_speed").max())
|
||||||
|
)
|
||||||
|
wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
|
||||||
|
|
||||||
|
wide = wide.with_columns(
|
||||||
|
pl.when(pl.col("pp_property_type") == pl.col("built_form"))
|
||||||
|
.then(pl.col("pp_property_type"))
|
||||||
|
.otherwise(
|
||||||
|
pl.concat_str(
|
||||||
|
[pl.col("pp_property_type"), pl.lit("/"), pl.col("built_form")]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.alias("property_type_built_form")
|
||||||
|
)
|
||||||
|
|
||||||
|
wide = (
|
||||||
|
wide.filter(pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||||
|
.filter(pl.col("latest_price") >= MIN_PRICE)
|
||||||
|
.with_columns(
|
||||||
|
pl.when(pl.col("duration") == "U")
|
||||||
|
.then(None)
|
||||||
|
.otherwise(pl.col("duration"))
|
||||||
|
.alias("duration"),
|
||||||
|
pl.when(pl.col("current_energy_rating") == "INVALID!")
|
||||||
|
.then(None)
|
||||||
|
.otherwise(pl.col("current_energy_rating"))
|
||||||
|
.alias("current_energy_rating"),
|
||||||
|
)
|
||||||
|
.with_columns(
|
||||||
|
(pl.col("latest_price") / pl.col("total_floor_area"))
|
||||||
|
.round(0)
|
||||||
|
.cast(pl.Int32)
|
||||||
|
.alias("Price per sqm"),
|
||||||
|
)
|
||||||
|
.drop(
|
||||||
|
"date_of_transfer",
|
||||||
|
"inspection_date",
|
||||||
|
"floor_height",
|
||||||
|
"LSOA name (2021)",
|
||||||
|
"Local Authority District code (2024)",
|
||||||
|
"Local Authority District name (2024)",
|
||||||
|
"Wider Barriers Sub-domain Score",
|
||||||
|
"Geographical Barriers Sub-domain Score",
|
||||||
|
"Adult Skills Sub-domain Score",
|
||||||
|
"Children and Young People Sub-domain Score",
|
||||||
|
"Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
|
||||||
|
"Income Deprivation Affecting Children Index (IDACI) Score (rate)",
|
||||||
|
"Barriers to Housing and Services Score",
|
||||||
|
"lsoa21",
|
||||||
|
"oa21",
|
||||||
|
"pp_property_type",
|
||||||
|
"built_form",
|
||||||
|
)
|
||||||
|
.rename(
|
||||||
|
{
|
||||||
|
"construction_age_band": "Approximate construction age",
|
||||||
|
"is_construction_date_approximate": "Is construction date approximate",
|
||||||
|
"pp_address": "Address per Property Register",
|
||||||
|
"epc_address": "Address per EPC",
|
||||||
|
"postcode": "Postcode",
|
||||||
|
"duration": "Leashold/Freehold",
|
||||||
|
"current_energy_rating": "Current energy rating",
|
||||||
|
"potential_energy_rating": "Potential energy rating",
|
||||||
|
"total_floor_area": "Total floor area (sqm)",
|
||||||
|
"epc_property_type": "Property type",
|
||||||
|
"property_type_built_form": "Property type/built form",
|
||||||
|
"restaurants_2km": "Restaurants within 2km",
|
||||||
|
"groceries_2km": "Groceries within 2km",
|
||||||
|
"parks_2km": "Parks within 2km",
|
||||||
|
"public_transport_2km": "Public transport within 2km",
|
||||||
|
"latest_price": "Last known price",
|
||||||
|
"number_habitable_rooms": "Number of bedrooms & living rooms",
|
||||||
|
"noise_lden_db": "Noise (dB)",
|
||||||
|
"good_primary_5km": "Good+ primary schools within 5km",
|
||||||
|
"good_secondary_5km": "Good+ secondary schools within 5km",
|
||||||
|
"max_download_speed": "Max available download speed (Mbps)",
|
||||||
|
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||||
|
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Collecting with streaming engine...")
|
||||||
|
return wide.collect(engine="streaming")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Build wide property dataframe with all joins"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--epc-pp", type=Path, required=True, help="EPC-Price Paid joined parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--arcgis", type=Path, required=True, help="ArcGIS postcode data parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--iod",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Index of Deprivation parquet file (optional)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--poi-proximity",
|
||||||
|
type=Path,
|
||||||
|
help="POI proximity counts parquet file (optional)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--journey-times",
|
||||||
|
required=True,
|
||||||
|
type=Path,
|
||||||
|
help="Journey times parquet file (optional)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--ethnicity",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Ethnicity by local authority parquet file (optional)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--crime",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Crime by LSOA parquet file (optional)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--noise", type=Path, required=True, help="Road noise by postcode parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--school-proximity",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="School proximity counts parquet file",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--broadband",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Broadband performance by output area parquet file",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
wide = _build_wide(
|
||||||
|
epc_pp_path=args.epc_pp,
|
||||||
|
arcgis_path=args.arcgis,
|
||||||
|
iod_path=args.iod,
|
||||||
|
poi_proximity_path=args.poi_proximity,
|
||||||
|
journey_times_path=args.journey_times,
|
||||||
|
ethnicity_path=args.ethnicity,
|
||||||
|
crime_path=args.crime,
|
||||||
|
noise_path=args.noise,
|
||||||
|
school_proximity_path=args.school_proximity,
|
||||||
|
broadband_path=args.broadband,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Columns: {wide.columns}")
|
||||||
|
print(f"Rows: {wide.height}")
|
||||||
|
|
||||||
|
wide.write_parquet(args.output)
|
||||||
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||||
|
|
||||||
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
52
pipeline/transform/poi_proximity.py
Normal file
52
pipeline/transform/poi_proximity.py
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
"""Compute POI proximity counts per postcode from ArcGIS + filtered POIs."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from pipeline.utils.poi_counts import _count_pois_per_postcode
|
||||||
|
|
||||||
|
|
||||||
|
# POI category groups for proximity counting
|
||||||
|
POI_GROUPS = {
|
||||||
|
"restaurants": ["Restaurant", "Fast Food"],
|
||||||
|
"groceries": ["Greengrocer", "Grocery Shop", "Supermarket", "Convenience Store"],
|
||||||
|
"parks": ["Park", "Garden", "Nature Reserve"],
|
||||||
|
"public_transport": ["Metro or Tram stop", "Rail station", "Bus stop", "Bus station"], # comes from naptan.py
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Count POIs within radius per postcode"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pois", type=Path, required=True, help="Filtered POIs parquet"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
postcodes = pl.read_parquet(args.arcgis).select(
|
||||||
|
pl.col("pcds").alias("postcode"),
|
||||||
|
"lat",
|
||||||
|
pl.col("long").alias("lon"),
|
||||||
|
)
|
||||||
|
|
||||||
|
pois = pl.read_parquet(args.pois)
|
||||||
|
|
||||||
|
result = _count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2)
|
||||||
|
|
||||||
|
result.write_parquet(args.output)
|
||||||
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||||
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
73
pipeline/transform/school_proximity.py
Normal file
73
pipeline/transform/school_proximity.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
"""Compute good-rated school proximity counts per postcode."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from pipeline.utils.poi_counts import _count_pois_per_postcode
|
||||||
|
|
||||||
|
SCHOOL_GROUPS = {
|
||||||
|
"good_primary": ["good_primary"],
|
||||||
|
"good_secondary": ["good_secondary"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Count good+ primary/secondary schools within 2km per postcode"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--ofsted", type=Path, required=True, help="Ofsted inspection parquet"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Load Ofsted data: filter to good+ (1, 2) primary/secondary schools
|
||||||
|
ofsted = pl.read_parquet(args.ofsted).filter(
|
||||||
|
pl.col("Ofsted phase").is_in(["Primary", "Secondary"])
|
||||||
|
& pl.col("Overall effectiveness").is_in(["1", "2"])
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Good+ schools: {len(ofsted):,}")
|
||||||
|
|
||||||
|
# Assign category based on phase
|
||||||
|
ofsted = ofsted.with_columns(
|
||||||
|
pl.when(pl.col("Ofsted phase") == "Primary")
|
||||||
|
.then(pl.lit("good_primary"))
|
||||||
|
.otherwise(pl.lit("good_secondary"))
|
||||||
|
.alias("category")
|
||||||
|
).select(
|
||||||
|
pl.col("Postcode").alias("postcode"),
|
||||||
|
"category",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Join with arcgis to get lat/lng for each school's postcode
|
||||||
|
arcgis = pl.read_parquet(args.arcgis).select(
|
||||||
|
pl.col("pcds").alias("postcode"),
|
||||||
|
"lat",
|
||||||
|
pl.col("long").alias("lng"),
|
||||||
|
)
|
||||||
|
|
||||||
|
schools = ofsted.join(arcgis, on="postcode", how="inner")
|
||||||
|
print(f"Schools with coordinates: {len(schools):,}")
|
||||||
|
|
||||||
|
# Load all postcodes for proximity counting
|
||||||
|
postcodes = arcgis.rename({"lng": "lon"})
|
||||||
|
|
||||||
|
result = _count_pois_per_postcode(
|
||||||
|
postcodes, schools, radius_km=5, groups=SCHOOL_GROUPS
|
||||||
|
)
|
||||||
|
|
||||||
|
result.write_parquet(args.output)
|
||||||
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||||
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
673
pipeline/transform/transform_poi.py
Normal file
673
pipeline/transform/transform_poi.py
Normal file
|
|
@ -0,0 +1,673 @@
|
||||||
|
import argparse
|
||||||
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
|
||||||
|
DROP_CATEGORIES = {
|
||||||
|
"amenity/advice",
|
||||||
|
"amenity/atm",
|
||||||
|
"amenity/bbq",
|
||||||
|
"amenity/bench",
|
||||||
|
"amenity/bicycle_parking",
|
||||||
|
"amenity/clock",
|
||||||
|
"amenity/fixme",
|
||||||
|
"amenity/grit_bin",
|
||||||
|
"amenity/hunting_stand",
|
||||||
|
"amenity/motorcycle_parking",
|
||||||
|
"amenity/notice_board",
|
||||||
|
"amenity/parking",
|
||||||
|
"amenity/parking_entrance",
|
||||||
|
"amenity/parking_space",
|
||||||
|
"amenity/post_box",
|
||||||
|
"amenity/telephone",
|
||||||
|
"amenity/toilets",
|
||||||
|
"amenity/vacuum_cleaner",
|
||||||
|
"amenity/waste_basket",
|
||||||
|
"building/air_shaft",
|
||||||
|
"building/apartments",
|
||||||
|
"building/detached",
|
||||||
|
"building/entrance",
|
||||||
|
"building/entry",
|
||||||
|
"building/garage",
|
||||||
|
"building/garages",
|
||||||
|
"building/house",
|
||||||
|
"building/hut",
|
||||||
|
"building/no",
|
||||||
|
"building/office",
|
||||||
|
"building/public",
|
||||||
|
"building/residential",
|
||||||
|
"building/roof",
|
||||||
|
"building/shed",
|
||||||
|
"building/terrace",
|
||||||
|
"building/yes",
|
||||||
|
"emergency/access_point",
|
||||||
|
"emergency/ambulance_station",
|
||||||
|
"emergency/assembly_point",
|
||||||
|
"emergency/bleed_control_kit",
|
||||||
|
"emergency/defibrillator",
|
||||||
|
"emergency/designated",
|
||||||
|
"emergency/dry_riser_inlet",
|
||||||
|
"emergency/emergency_ward_entrance",
|
||||||
|
"emergency/fire_alarm_box",
|
||||||
|
"emergency/fire_extinguisher",
|
||||||
|
"emergency/fire_hydrant",
|
||||||
|
"emergency/fire_service_inlet",
|
||||||
|
"emergency/first_aid_kit",
|
||||||
|
"emergency/life_ring",
|
||||||
|
"emergency/lifeguard",
|
||||||
|
"emergency/no",
|
||||||
|
"emergency/phone",
|
||||||
|
"emergency/rescue_equipment",
|
||||||
|
"emergency/siren",
|
||||||
|
"emergency/throw_bag",
|
||||||
|
"emergency/water_rescue",
|
||||||
|
"emergency/yes",
|
||||||
|
"leisure/firepit",
|
||||||
|
"leisure/fishing",
|
||||||
|
"leisure/picnic_table",
|
||||||
|
"office/company",
|
||||||
|
"office/yes",
|
||||||
|
"tourism/apartment",
|
||||||
|
"tourism/apartments",
|
||||||
|
"tourism/camp_pitch",
|
||||||
|
"tourism/information",
|
||||||
|
"tourism/village_sign",
|
||||||
|
"tourism/yes",
|
||||||
|
# public transport comes from naptan
|
||||||
|
"public_transport/entrance",
|
||||||
|
"public_transport/platform",
|
||||||
|
"public_transport/station",
|
||||||
|
"public_transport/stop_position",
|
||||||
|
}
|
||||||
|
|
||||||
|
# (friendly_name, emoji) for every category we keep
|
||||||
|
CATEGORY_MAP: dict[str, tuple[str, str]] = {
|
||||||
|
# amenity
|
||||||
|
"amenity/animal_boarding": ("Animal Boarding", "🐾"),
|
||||||
|
"amenity/animal_breeding": ("Animal Breeding", "🐣"),
|
||||||
|
"amenity/animal_shelter": ("Animal Shelter", "🏠"),
|
||||||
|
"amenity/arts_centre": ("Arts Centre", "🎨"),
|
||||||
|
"amenity/bank": ("Bank", "🏦"),
|
||||||
|
"amenity/bar": ("Bar", "🍸"),
|
||||||
|
"amenity/bicycle_rental": ("Bike Rental", "🚲"),
|
||||||
|
"amenity/bicycle_repair_station": ("Bike Repair", "🔧"),
|
||||||
|
"amenity/binoculars": ("Public Binoculars", "🔭"),
|
||||||
|
"amenity/boat_rental": ("Boat Rental", "⛵"),
|
||||||
|
"amenity/boat_storage": ("Boat Storage", "🚢"),
|
||||||
|
"amenity/boot_scraper": ("Boot Scraper", "🥾"),
|
||||||
|
"amenity/bureau_de_change": ("Currency Exchange", "💱"),
|
||||||
|
"amenity/bus_station": ("Bus Station", "🚌"),
|
||||||
|
"amenity/cafe": ("Café", "☕"),
|
||||||
|
"amenity/car_rental": ("Car Rental", "🚗"),
|
||||||
|
"amenity/car_sharing": ("Car Sharing", "🚙"),
|
||||||
|
"amenity/car_wash": ("Car Wash", "🧽"),
|
||||||
|
"amenity/care_home": ("Care Home", "🏥"),
|
||||||
|
"amenity/casino": ("Casino", "🎰"),
|
||||||
|
"amenity/charging_station": ("EV Charging", "🔌"),
|
||||||
|
"amenity/check_in": ("Check-In Point", "✅"),
|
||||||
|
"amenity/childcare": ("Childcare", "👶"),
|
||||||
|
"amenity/cinema": ("Cinema", "🎬"),
|
||||||
|
"amenity/clinic": ("Clinic", "🩺"),
|
||||||
|
"amenity/club": ("Club", "🏛️"),
|
||||||
|
"amenity/college": ("College", "🎓"),
|
||||||
|
"amenity/community_centre": ("Community Centre", "🤝"),
|
||||||
|
"amenity/compressed_air": ("Compressed Air", "💨"),
|
||||||
|
"amenity/conference_centre": ("Conference Centre", "📋"),
|
||||||
|
"amenity/courthouse": ("Courthouse", "⚖️"),
|
||||||
|
"amenity/coworking_space": ("Co-working Space", "💻"),
|
||||||
|
"amenity/crematorium": ("Crematorium", "🕯️"),
|
||||||
|
"amenity/dancing_school": ("Dance School", "💃"),
|
||||||
|
"amenity/dentist": ("Dentist", "🦷"),
|
||||||
|
"amenity/doctors": ("Doctor", "👨⚕️"),
|
||||||
|
"amenity/dojo": ("Dojo", "🥋"),
|
||||||
|
"amenity/donation_box": ("Donation Box", "📦"),
|
||||||
|
"amenity/dressing_room": ("Dressing Room", "👗"),
|
||||||
|
"amenity/drinking_water": ("Drinking Water", "🚰"),
|
||||||
|
"amenity/driving_school": ("Driving School", "🚦"),
|
||||||
|
"amenity/escooter_rental": ("E-Scooter Rental", "🛴"),
|
||||||
|
"amenity/events_venue": ("Events Venue", "🎪"),
|
||||||
|
"amenity/fast_food": ("Fast Food", "🍔"),
|
||||||
|
"amenity/feeding_place": ("Feeding Place", "🍽️"),
|
||||||
|
"amenity/ferry_terminal": ("Ferry Terminal", "⛴️"),
|
||||||
|
"amenity/fire_station": ("Fire Station", "🚒"),
|
||||||
|
"amenity/food_court": ("Food Court", "🍴"),
|
||||||
|
"amenity/fountain": ("Fountain", "⛲"),
|
||||||
|
"amenity/fuel": ("Fuel Station", "⛽"),
|
||||||
|
"amenity/gambling": ("Gambling", "🎲"),
|
||||||
|
"amenity/grave_yard": ("Graveyard", "🪦"),
|
||||||
|
"amenity/hall": ("Hall", "🏛️"),
|
||||||
|
"amenity/hookah_lounge": ("Hookah Lounge", "💨"),
|
||||||
|
"amenity/hospital": ("Hospital", "🏥"),
|
||||||
|
"amenity/ice_cream": ("Ice Cream", "🍦"),
|
||||||
|
"amenity/internet_cafe": ("Internet Café", "🌐"),
|
||||||
|
"amenity/kick-scooter_rental": ("Kick Scooter Rental", "🛴"),
|
||||||
|
"amenity/kindergarten": ("Kindergarten", "💒"),
|
||||||
|
"amenity/language_school": ("Language School", "🗣️"),
|
||||||
|
"amenity/letter_box": ("Letter Box", "📮"),
|
||||||
|
"amenity/library": ("Library", "📚"),
|
||||||
|
"amenity/loading_dock": ("Loading Dock", "📥"),
|
||||||
|
"amenity/lounge": ("Lounge", "🛋️"),
|
||||||
|
"amenity/lounger": ("Public Lounger", "🪑"),
|
||||||
|
"amenity/marketplace": ("Market", "🛒"),
|
||||||
|
"amenity/money_transfer": ("Money Transfer", "💸"),
|
||||||
|
"amenity/mounting_block": ("Mounting Block", "🐴"),
|
||||||
|
"amenity/music_school": ("Music School", "🎵"),
|
||||||
|
"amenity/music_venue": ("Music Venue", "🎶"),
|
||||||
|
"amenity/nightclub": ("Nightclub", "🪩"),
|
||||||
|
"amenity/nursing_home": ("Nursing Home", "🏠"),
|
||||||
|
"amenity/parcel_locker": ("Parcel Locker", "📦"),
|
||||||
|
"amenity/payment_terminal": ("Payment Terminal", "💳"),
|
||||||
|
"amenity/pharmacy": ("Pharmacy", "💊"),
|
||||||
|
"amenity/photo_booth": ("Photo Booth", "📸"),
|
||||||
|
"amenity/piano": ("Public Piano", "🎹"),
|
||||||
|
"amenity/place_of_worship": ("Place of Worship", "⛪"),
|
||||||
|
"amenity/police": ("Police Station", "🚔"),
|
||||||
|
"amenity/post_depot": ("Post Depot", "📬"),
|
||||||
|
"amenity/post_office": ("Post Office", "🏤"),
|
||||||
|
"amenity/prep_school": ("Prep School", "📖"),
|
||||||
|
"amenity/pub": ("Pub", "🍺"),
|
||||||
|
"amenity/public_bookcase": ("Public Bookcase", "📕"),
|
||||||
|
"amenity/public_building": ("Public Building", "🏢"),
|
||||||
|
"amenity/reception_desk": ("Reception Desk", "🛎️"),
|
||||||
|
"amenity/recycling": ("Recycling", "♻️"),
|
||||||
|
"amenity/restaurant": ("Restaurant", "🍽️"),
|
||||||
|
"amenity/sanitary_dump_station": ("Sanitary Dump Station", "🚿"),
|
||||||
|
"amenity/school": ("School", "🏫"),
|
||||||
|
"amenity/scout_hut": ("Scout Hut", "⚜️"),
|
||||||
|
"amenity/shelter": ("Shelter", "🛖"),
|
||||||
|
"amenity/shower": ("Public Shower", "🚿"),
|
||||||
|
"amenity/smoking_area": ("Smoking Area", "🚬"),
|
||||||
|
"amenity/social_centre": ("Social Centre", "🏘️"),
|
||||||
|
"amenity/social_club": ("Social Club", "🤝"),
|
||||||
|
"amenity/social_facility": ("Social Facility", "🫂"),
|
||||||
|
"amenity/stripclub": ("Strip Club", "🔞"),
|
||||||
|
"amenity/studio": ("Studio", "🎙️"),
|
||||||
|
"amenity/table": ("Public Table", "🪑"),
|
||||||
|
"amenity/taxi": ("Taxi Stand", "🚕"),
|
||||||
|
"amenity/telescope": ("Public Telescope", "🔭"),
|
||||||
|
"amenity/theatre": ("Theatre", "🎭"),
|
||||||
|
"amenity/ticket_validator": ("Ticket Validator", "🎫"),
|
||||||
|
"amenity/townhall": ("Town Hall", "🏛️"),
|
||||||
|
"amenity/training": ("Training Centre", "📝"),
|
||||||
|
"amenity/trolley_bay": ("Trolley Bay", "🛒"),
|
||||||
|
"amenity/university": ("University", "🏫"),
|
||||||
|
"amenity/vehicle_inspection": ("Vehicle Inspection", "🔍"),
|
||||||
|
"amenity/vending_machine": ("Vending Machine", "🏧"),
|
||||||
|
"amenity/veterinary": ("Vet", "🐕"),
|
||||||
|
"amenity/washing_machine": ("Washing Machine", "🧺"),
|
||||||
|
"amenity/washingline": ("Washing Line", "👕"),
|
||||||
|
"amenity/waste_disposal": ("Waste Disposal", "🗑️"),
|
||||||
|
"amenity/waste_transfer_station": ("Waste Transfer Station", "🚛"),
|
||||||
|
"amenity/water_point": ("Water Point", "💧"),
|
||||||
|
"amenity/watering_place": ("Watering Place", "🚰"),
|
||||||
|
"amenity/weighbridge": ("Weighbridge", "⚖️"),
|
||||||
|
# building
|
||||||
|
"building/barn": ("Barn", "🏚️"),
|
||||||
|
"building/bunker": ("Bunker", "🏗️"),
|
||||||
|
"building/chapel": ("Chapel", "⛪"),
|
||||||
|
"building/church": ("Church", "⛪"),
|
||||||
|
"building/commercial": ("Commercial Building", "🏬"),
|
||||||
|
"building/construction": ("Construction Site", "🚧"),
|
||||||
|
"building/farm": ("Farmhouse", "🌾"),
|
||||||
|
"building/greenhouse": ("Greenhouse", "🌿"),
|
||||||
|
"building/industrial": ("Industrial Building", "🏭"),
|
||||||
|
"building/kiosk": ("Kiosk", "🏪"),
|
||||||
|
"building/retail": ("Retail Building", "🏬"),
|
||||||
|
"building/ruins": ("Ruins", "🏚️"),
|
||||||
|
"building/school": ("School Building", "🏫"),
|
||||||
|
"building/semidetached_house": ("Semi-Detached House", "🏠"),
|
||||||
|
"building/service": ("Service Building", "🔧"),
|
||||||
|
"building/university": ("University Building", "🎓"),
|
||||||
|
"building/warehouse": ("Warehouse", "🏭"),
|
||||||
|
# craft
|
||||||
|
"craft/agricultural_engines": ("Agricultural Engines", "🚜"),
|
||||||
|
"craft/atelier": ("Atelier", "🎨"),
|
||||||
|
"craft/blacksmith": ("Blacksmith", "🔨"),
|
||||||
|
"craft/bookbinder": ("Bookbinder", "📖"),
|
||||||
|
"craft/brewery": ("Brewery", "🍺"),
|
||||||
|
"craft/builder": ("Builder", "🧱"),
|
||||||
|
"craft/carpenter": ("Carpenter", "🪚"),
|
||||||
|
"craft/caterer": ("Caterer", "🍱"),
|
||||||
|
"craft/cleaning": ("Cleaning Service", "🧹"),
|
||||||
|
"craft/confectionery": ("Confectioner", "🍬"),
|
||||||
|
"craft/distillery": ("Distillery", "🥃"),
|
||||||
|
"craft/dressmaker": ("Dressmaker", "👗"),
|
||||||
|
"craft/electrician": ("Electrician", "⚡"),
|
||||||
|
"craft/electronics_repair": ("Electronics Repair", "🔌"),
|
||||||
|
"craft/floorer": ("Flooring Specialist", "🪵"),
|
||||||
|
"craft/gardener": ("Gardener", "🌱"),
|
||||||
|
"craft/glaziery": ("Glazier", "🪟"),
|
||||||
|
"craft/handicraft": ("Handicraft", "✂️"),
|
||||||
|
"craft/hvac": ("HVAC", "❄️"),
|
||||||
|
"craft/jeweller": ("Jeweller", "💎"),
|
||||||
|
"craft/joiner": ("Joiner", "🪚"),
|
||||||
|
"craft/key_cutter": ("Key Cutter", "🔑"),
|
||||||
|
"craft/locksmith": ("Locksmith", "🔐"),
|
||||||
|
"craft/metal_construction": ("Metal Fabrication", "🔩"),
|
||||||
|
"craft/painter": ("Painter & Decorator", "🖌️"),
|
||||||
|
"craft/photographer": ("Photographer", "📷"),
|
||||||
|
"craft/photographic_laboratory": ("Photo Lab", "🖼️"),
|
||||||
|
"craft/plumber": ("Plumber", "🔧"),
|
||||||
|
"craft/pottery": ("Pottery", "🏺"),
|
||||||
|
"craft/printer": ("Printer", "🖨️"),
|
||||||
|
"craft/roofer": ("Roofer", "🏠"),
|
||||||
|
"craft/sawmill": ("Sawmill", "🪵"),
|
||||||
|
"craft/scaffolder": ("Scaffolder", "🏗️"),
|
||||||
|
"craft/sculptor": ("Sculptor", "🗿"),
|
||||||
|
"craft/shoemaker": ("Shoemaker", "👞"),
|
||||||
|
"craft/signmaker": ("Sign Maker", "🪧"),
|
||||||
|
"craft/stonemason": ("Stonemason", "🪨"),
|
||||||
|
"craft/tailor": ("Tailor", "🧵"),
|
||||||
|
"craft/upholsterer": ("Upholsterer", "🛋️"),
|
||||||
|
"craft/watchmaker": ("Watchmaker", "⌚"),
|
||||||
|
"craft/window_construction": ("Window Fitter", "🪟"),
|
||||||
|
"craft/winery": ("Winery", "🍷"),
|
||||||
|
"craft/yes": ("Craft Workshop", "🛠️"),
|
||||||
|
# healthcare
|
||||||
|
"healthcare/alternative": ("Alternative Medicine", "🌿"),
|
||||||
|
"healthcare/audiologist": ("Audiologist", "👂"),
|
||||||
|
"healthcare/centre": ("Health Centre", "🏥"),
|
||||||
|
"healthcare/clinic": ("Health Clinic", "🩺"),
|
||||||
|
"healthcare/counselling": ("Counselling", "🧠"),
|
||||||
|
"healthcare/dentist": ("Dental Practice", "🦷"),
|
||||||
|
"healthcare/doctor": ("GP Surgery", "👨⚕️"),
|
||||||
|
"healthcare/hospital": ("Hospital", "🏥"),
|
||||||
|
"healthcare/laboratory": ("Medical Lab", "🔬"),
|
||||||
|
"healthcare/optometrist": ("Optometrist", "👁️"),
|
||||||
|
"healthcare/pharmacy": ("Pharmacy", "💊"),
|
||||||
|
"healthcare/physiotherapist": ("Physiotherapist", "🏃"),
|
||||||
|
"healthcare/podiatrist": ("Podiatrist", "🦶"),
|
||||||
|
"healthcare/psychotherapist": ("Psychotherapist", "🧠"),
|
||||||
|
"healthcare/rehabilitation": ("Rehabilitation Centre", "♿"),
|
||||||
|
"healthcare/vaccination_centre": ("Vaccination Centre", "💉"),
|
||||||
|
"healthcare/yes": ("Healthcare Facility", "🏥"),
|
||||||
|
# leisure
|
||||||
|
"leisure/adult_gaming_centre": ("Adult Gaming Centre", "🎮"),
|
||||||
|
"leisure/amusement_arcade": ("Amusement Arcade", "🕹️"),
|
||||||
|
"leisure/bandstand": ("Bandstand", "🎺"),
|
||||||
|
"leisure/bathing_place": ("Bathing Spot", "🏖️"),
|
||||||
|
"leisure/bird_hide": ("Bird Hide", "🐦"),
|
||||||
|
"leisure/bowling_alley": ("Bowling Alley", "🎳"),
|
||||||
|
"leisure/common": ("Common Land", "🌳"),
|
||||||
|
"leisure/dance": ("Dance Venue", "💃"),
|
||||||
|
"leisure/dog_park": ("Dog Park", "🐕"),
|
||||||
|
"leisure/escape_game": ("Escape Room", "🔓"),
|
||||||
|
"leisure/fitness_centre": ("Gym", "🏋️"),
|
||||||
|
"leisure/fitness_station": ("Outdoor Gym", "💪"),
|
||||||
|
"leisure/garden": ("Garden", "🌷"),
|
||||||
|
"leisure/golf_course": ("Golf Course", "⛳"),
|
||||||
|
"leisure/hackerspace": ("Hackerspace", "💻"),
|
||||||
|
"leisure/horse_riding": ("Horse Riding", "🐎"),
|
||||||
|
"leisure/indoor_play": ("Indoor Play Area", "🧒"),
|
||||||
|
"leisure/marina": ("Marina", "⚓"),
|
||||||
|
"leisure/miniature_golf": ("Mini Golf", "⛳"),
|
||||||
|
"leisure/nature_reserve": ("Nature Reserve", "🦔"),
|
||||||
|
"leisure/outdoor_seating": ("Outdoor Seating", "🪑"),
|
||||||
|
"leisure/park": ("Park", "🌳"),
|
||||||
|
"leisure/pitch": ("Sports Pitch", "⚽"),
|
||||||
|
"leisure/playground": ("Playground", "🛝"),
|
||||||
|
"leisure/sauna": ("Sauna", "🧖"),
|
||||||
|
"leisure/slipway": ("Slipway", "🚤"),
|
||||||
|
"leisure/social_club": ("Social Club", "🍻"),
|
||||||
|
"leisure/sports_centre": ("Sports Centre", "🏟️"),
|
||||||
|
"leisure/sports_hall": ("Sports Hall", "🏀"),
|
||||||
|
"leisure/swimming_pool": ("Swimming Pool", "🏊"),
|
||||||
|
"leisure/tanning_salon": ("Tanning Salon", "☀️"),
|
||||||
|
"leisure/track": ("Running Track", "🏃"),
|
||||||
|
"leisure/trampoline_park": ("Trampoline Park", "🤸"),
|
||||||
|
"leisure/water_park": ("Water Park", "🌊"),
|
||||||
|
"leisure/wildlife_hide": ("Wildlife Hide", "🦌"),
|
||||||
|
"leisure/yes": ("Leisure Facility", "🎉"),
|
||||||
|
# office
|
||||||
|
"office/accountant": ("Accountant", "🧮"),
|
||||||
|
"office/advertising_agency": ("Advertising Agency", "📢"),
|
||||||
|
"office/architect": ("Architect", "📐"),
|
||||||
|
"office/association": ("Association", "🏛️"),
|
||||||
|
"office/charity": ("Charity", "❤️"),
|
||||||
|
"office/construction_company": ("Construction Company", "🏗️"),
|
||||||
|
"office/consulting": ("Consulting Firm", "📊"),
|
||||||
|
"office/courier": ("Courier Service", "📦"),
|
||||||
|
"office/coworking": ("Co-working Space", "💻"),
|
||||||
|
"office/design": ("Design Studio", "🎨"),
|
||||||
|
"office/diplomatic": ("Diplomatic Office", "🏛️"),
|
||||||
|
"office/educational_institution": ("Education Office", "🎓"),
|
||||||
|
"office/employment_agency": ("Employment Agency", "💼"),
|
||||||
|
"office/energy_supplier": ("Energy Supplier", "⚡"),
|
||||||
|
"office/engineer": ("Engineering Firm", "⚙️"),
|
||||||
|
"office/estate_agent": ("Estate Agent", "🏠"),
|
||||||
|
"office/financial": ("Financial Services", "💰"),
|
||||||
|
"office/financial_advisor": ("Financial Advisor", "📈"),
|
||||||
|
"office/foundation": ("Foundation", "🏛️"),
|
||||||
|
"office/government": ("Government Office", "🏛️"),
|
||||||
|
"office/graphic_design": ("Graphic Design", "🖌️"),
|
||||||
|
"office/healthcare": ("Healthcare Office", "🏥"),
|
||||||
|
"office/home_care": ("Home Care Service", "🏠"),
|
||||||
|
"office/insurance": ("Insurance", "🛡️"),
|
||||||
|
"office/interior_design": ("Interior Design", "🛋️"),
|
||||||
|
"office/it": ("IT Company", "💻"),
|
||||||
|
"office/lawyer": ("Lawyer", "⚖️"),
|
||||||
|
"office/logistics": ("Logistics", "🚚"),
|
||||||
|
"office/marketing": ("Marketing Agency", "📣"),
|
||||||
|
"office/mortgage": ("Mortgage Broker", "🏦"),
|
||||||
|
"office/moving_company": ("Moving Company", "📦"),
|
||||||
|
"office/newspaper": ("Newspaper Office", "📰"),
|
||||||
|
"office/ngo": ("NGO", "🌍"),
|
||||||
|
"office/notary": ("Notary", "📜"),
|
||||||
|
"office/political_party": ("Political Party", "🗳️"),
|
||||||
|
"office/politician": ("Politician Office", "🏛️"),
|
||||||
|
"office/property_management": ("Property Management", "🏘️"),
|
||||||
|
"office/recruitment": ("Recruitment Agency", "👥"),
|
||||||
|
"office/religion": ("Religious Office", "✝️"),
|
||||||
|
"office/research": ("Research Office", "🔬"),
|
||||||
|
"office/security": ("Security Company", "🔒"),
|
||||||
|
"office/solicitor": ("Solicitor", "⚖️"),
|
||||||
|
"office/surveyor": ("Surveyor", "📏"),
|
||||||
|
"office/tax_advisor": ("Tax Advisor", "🧾"),
|
||||||
|
"office/taxi": ("Taxi Office", "🚕"),
|
||||||
|
"office/telecommunication": ("Telecoms Office", "📡"),
|
||||||
|
"office/therapist": ("Therapist", "🧠"),
|
||||||
|
"office/travel_agent": ("Travel Agent", "✈️"),
|
||||||
|
"office/union": ("Trade Union", "✊"),
|
||||||
|
"office/university": ("University Office", "🎓"),
|
||||||
|
"office/vacant": ("Vacant Office", "🏚️"),
|
||||||
|
"office/web_design": ("Web Design", "🌐"),
|
||||||
|
# shop
|
||||||
|
"shop/accessories": ("Accessories Shop", "👜"),
|
||||||
|
"shop/agrarian": ("Farm Supply Shop", "🌾"),
|
||||||
|
"shop/alcohol": ("Off-Licence", "🍷"),
|
||||||
|
"shop/antiques": ("Antiques Shop", "🏺"),
|
||||||
|
"shop/appliance": ("Appliance Shop", "🔌"),
|
||||||
|
"shop/art": ("Art Shop", "🎨"),
|
||||||
|
"shop/baby_goods": ("Baby Shop", "🍼"),
|
||||||
|
"shop/bag": ("Bag Shop", "👜"),
|
||||||
|
"shop/bakery": ("Bakery", "🥐"),
|
||||||
|
"shop/bathroom": ("Bathroom Shop", "🛁"),
|
||||||
|
"shop/bathroom_furnishing": ("Bathroom Furnishings", "🚿"),
|
||||||
|
"shop/beauty": ("Beauty Shop", "💄"),
|
||||||
|
"shop/bed": ("Bed Shop", "🛏️"),
|
||||||
|
"shop/beverages": ("Drinks Shop", "🥤"),
|
||||||
|
"shop/bicycle": ("Bike Shop", "🚲"),
|
||||||
|
"shop/boat": ("Boat Shop", "⛵"),
|
||||||
|
"shop/bookmaker": ("Bookmaker", "🏇"),
|
||||||
|
"shop/books": ("Bookshop", "📚"),
|
||||||
|
"shop/boutique": ("Boutique", "👗"),
|
||||||
|
"shop/building_materials": ("Building Materials", "🧱"),
|
||||||
|
"shop/butcher": ("Butcher", "🥩"),
|
||||||
|
"shop/camera": ("Camera Shop", "📷"),
|
||||||
|
"shop/candles": ("Candle Shop", "🕯️"),
|
||||||
|
"shop/car": ("Car Dealership", "🚗"),
|
||||||
|
"shop/car;car_repair": ("Car Sales & Repair", "🚗"),
|
||||||
|
"shop/car_parts": ("Car Parts", "🔩"),
|
||||||
|
"shop/car_repair": ("Car Repair", "🔧"),
|
||||||
|
"shop/caravan": ("Caravan Dealer", "🚐"),
|
||||||
|
"shop/carpet": ("Carpet Shop", "🧶"),
|
||||||
|
"shop/catalogue": ("Catalogue Shop", "📋"),
|
||||||
|
"shop/charity": ("Charity Shop", "❤️"),
|
||||||
|
"shop/cheese": ("Cheese Shop", "🧀"),
|
||||||
|
"shop/chemist": ("Chemist", "🧪"),
|
||||||
|
"shop/chocolate": ("Chocolate Shop", "🍫"),
|
||||||
|
"shop/clothes": ("Clothes Shop", "👕"),
|
||||||
|
"shop/coffee": ("Coffee Shop", "☕"),
|
||||||
|
"shop/collector": ("Collector Shop", "🏆"),
|
||||||
|
"shop/computer": ("Computer Shop", "🖥️"),
|
||||||
|
"shop/confectionery": ("Sweet Shop", "🍬"),
|
||||||
|
"shop/convenience": ("Convenience Store", "🏪"),
|
||||||
|
"shop/copyshop": ("Copy Shop", "🖨️"),
|
||||||
|
"shop/cosmetics": ("Cosmetics Shop", "💅"),
|
||||||
|
"shop/country_store": ("Country Store", "🏡"),
|
||||||
|
"shop/craft": ("Craft Shop", "✂️"),
|
||||||
|
"shop/curtain": ("Curtain Shop", "🪟"),
|
||||||
|
"shop/dairy": ("Dairy Shop", "🥛"),
|
||||||
|
"shop/deli": ("Delicatessen", "🧆"),
|
||||||
|
"shop/department_store": ("Department Store", "🏬"),
|
||||||
|
"shop/discount": ("Discount Store", "💲"),
|
||||||
|
"shop/doityourself": ("DIY Store", "🔨"),
|
||||||
|
"shop/doors": ("Door Shop", "🚪"),
|
||||||
|
"shop/dry_cleaning": ("Dry Cleaner", "👔"),
|
||||||
|
"shop/e-cigarette": ("Vape Shop", "💨"),
|
||||||
|
"shop/electrical": ("Electrical Shop", "⚡"),
|
||||||
|
"shop/electronics": ("Electronics Shop", "📱"),
|
||||||
|
"shop/erotic": ("Adult Shop", "🔞"),
|
||||||
|
"shop/esoteric": ("Esoteric Shop", "🔮"),
|
||||||
|
"shop/estate_agent": ("Estate Agent", "🏠"),
|
||||||
|
"shop/fabric": ("Fabric Shop", "🧵"),
|
||||||
|
"shop/fan": ("Fan Shop", "🏅"),
|
||||||
|
"shop/farm": ("Farm Shop", "🥕"),
|
||||||
|
"shop/fashion_accessories": ("Fashion Accessories", "👒"),
|
||||||
|
"shop/fireplace": ("Fireplace Shop", "🔥"),
|
||||||
|
"shop/fishing": ("Fishing Shop", "🎣"),
|
||||||
|
"shop/flooring": ("Flooring Shop", "🪵"),
|
||||||
|
"shop/florist": ("Florist", "💐"),
|
||||||
|
"shop/food": ("Food Shop", "🍞"),
|
||||||
|
"shop/frame": ("Framing Shop", "🖼️"),
|
||||||
|
"shop/frozen_food": ("Frozen Food Shop", "🧊"),
|
||||||
|
"shop/fuel": ("Fuel Shop", "⛽"),
|
||||||
|
"shop/funeral_directors": ("Funeral Director", "⚰️"),
|
||||||
|
"shop/furniture": ("Furniture Shop", "🪑"),
|
||||||
|
"shop/games": ("Games Shop", "🎮"),
|
||||||
|
"shop/garden_centre": ("Garden Centre", "🌻"),
|
||||||
|
"shop/gas": ("Gas Shop", "🔥"),
|
||||||
|
"shop/general": ("General Store", "🏪"),
|
||||||
|
"shop/gift": ("Gift Shop", "🎁"),
|
||||||
|
"shop/glaziery": ("Glazier", "🪟"),
|
||||||
|
"shop/greengrocer": ("Greengrocer", "🥬"),
|
||||||
|
"shop/grocery": ("Grocery Shop", "🛒"),
|
||||||
|
"shop/haberdashery": ("Haberdashery", "🧵"),
|
||||||
|
"shop/hairdresser": ("Hairdresser", "💇"),
|
||||||
|
"shop/hairdresser_supply": ("Hairdresser Supply", "💇"),
|
||||||
|
"shop/hardware": ("Hardware Shop", "🔩"),
|
||||||
|
"shop/health": ("Health Shop", "🌿"),
|
||||||
|
"shop/health_food": ("Health Food Shop", "🥗"),
|
||||||
|
"shop/hearing_aids": ("Hearing Aid Shop", "👂"),
|
||||||
|
"shop/herbalist": ("Herbalist", "🌿"),
|
||||||
|
"shop/hifi": ("Hi-Fi Shop", "🔊"),
|
||||||
|
"shop/household": ("Household Shop", "🏠"),
|
||||||
|
"shop/household_linen": ("Linen Shop", "🛏️"),
|
||||||
|
"shop/houseware": ("Houseware Shop", "🍳"),
|
||||||
|
"shop/ice_cream": ("Ice Cream Shop", "🍦"),
|
||||||
|
"shop/interior_decoration": ("Interior Decoration", "🖼️"),
|
||||||
|
"shop/jewelry": ("Jewellery Shop", "💍"),
|
||||||
|
"shop/kiosk": ("Kiosk", "🏪"),
|
||||||
|
"shop/kitchen": ("Kitchen Shop", "🍳"),
|
||||||
|
"shop/laundry": ("Laundry", "🧺"),
|
||||||
|
"shop/leather": ("Leather Shop", "🧳"),
|
||||||
|
"shop/lighting": ("Lighting Shop", "💡"),
|
||||||
|
"shop/locksmith": ("Locksmith", "🔐"),
|
||||||
|
"shop/mall": ("Shopping Centre", "🏬"),
|
||||||
|
"shop/massage": ("Massage Parlour", "💆"),
|
||||||
|
"shop/medical_supply": ("Medical Supply", "🩺"),
|
||||||
|
"shop/military_surplus": ("Military Surplus", "🎖️"),
|
||||||
|
"shop/mobile_phone": ("Mobile Phone Shop", "📱"),
|
||||||
|
"shop/mobile_phone_accessories": ("Phone Accessories", "📱"),
|
||||||
|
"shop/mobility": ("Mobility Shop", "♿"),
|
||||||
|
"shop/mobility_scooter": ("Mobility Scooter Shop", "🦽"),
|
||||||
|
"shop/model": ("Model Shop", "✈️"),
|
||||||
|
"shop/money_lender": ("Money Lender", "💰"),
|
||||||
|
"shop/motorcycle": ("Motorcycle Shop", "🏍️"),
|
||||||
|
"shop/motorcycle_repair": ("Motorcycle Repair", "🔧"),
|
||||||
|
"shop/music": ("Music Shop", "🎵"),
|
||||||
|
"shop/musical_instrument": ("Musical Instrument Shop", "🎸"),
|
||||||
|
"shop/newsagent": ("Newsagent", "📰"),
|
||||||
|
"shop/nutrition_supplements": ("Nutrition Shop", "💪"),
|
||||||
|
"shop/optician": ("Optician", "👓"),
|
||||||
|
"shop/outdoor": ("Outdoor Shop", "🏕️"),
|
||||||
|
"shop/outpost": ("Outpost", "📦"),
|
||||||
|
"shop/paint": ("Paint Shop", "🎨"),
|
||||||
|
"shop/party": ("Party Shop", "🎈"),
|
||||||
|
"shop/pastry": ("Pastry Shop", "🥐"),
|
||||||
|
"shop/pawnbroker": ("Pawnbroker", "💰"),
|
||||||
|
"shop/perfumery": ("Perfumery", "🌸"),
|
||||||
|
"shop/pet": ("Pet Shop", "🐾"),
|
||||||
|
"shop/pet_grooming": ("Pet Grooming", "🐩"),
|
||||||
|
"shop/photo": ("Photo Shop", "📸"),
|
||||||
|
"shop/piercing": ("Piercing Studio", "💎"),
|
||||||
|
"shop/plant_hire": ("Plant Hire", "🚜"),
|
||||||
|
"shop/pottery": ("Pottery Shop", "🏺"),
|
||||||
|
"shop/printer_ink": ("Ink & Toner Shop", "🖨️"),
|
||||||
|
"shop/printing": ("Print Shop", "🖨️"),
|
||||||
|
"shop/psychic": ("Psychic", "🔮"),
|
||||||
|
"shop/pyrotechnics": ("Fireworks Shop", "🎆"),
|
||||||
|
"shop/religion": ("Religious Shop", "✝️"),
|
||||||
|
"shop/rental": ("Rental Shop", "🔑"),
|
||||||
|
"shop/repair": ("Repair Shop", "🔧"),
|
||||||
|
"shop/scuba_diving": ("Scuba Diving Shop", "🤿"),
|
||||||
|
"shop/seafood": ("Fishmonger", "🐟"),
|
||||||
|
"shop/second_hand": ("Second-Hand Shop", "♻️"),
|
||||||
|
"shop/security": ("Security Shop", "🔒"),
|
||||||
|
"shop/sewing": ("Sewing Shop", "🪡"),
|
||||||
|
"shop/shoe_repair": ("Shoe Repair", "👞"),
|
||||||
|
"shop/shoes": ("Shoe Shop", "👟"),
|
||||||
|
"shop/sports": ("Sports Shop", "⚽"),
|
||||||
|
"shop/stationery": ("Stationery Shop", "✏️"),
|
||||||
|
"shop/storage_rental": ("Self Storage", "📦"),
|
||||||
|
"shop/supermarket": ("Supermarket", "🛒"),
|
||||||
|
"shop/swimming_pool": ("Pool Supplies", "🏊"),
|
||||||
|
"shop/tailor": ("Tailor", "🧵"),
|
||||||
|
"shop/tattoo": ("Tattoo Studio", "🖋️"),
|
||||||
|
"shop/taxi": ("Taxi Booking", "🚕"),
|
||||||
|
"shop/tea": ("Tea Shop", "🫖"),
|
||||||
|
"shop/telecommunication": ("Telecoms Shop", "📡"),
|
||||||
|
"shop/ticket": ("Ticket Office", "🎫"),
|
||||||
|
"shop/tiles": ("Tile Shop", "🔲"),
|
||||||
|
"shop/tobacco": ("Tobacconist", "🚬"),
|
||||||
|
"shop/tool_hire": ("Tool Hire", "🧰"),
|
||||||
|
"shop/toys": ("Toy Shop", "🧸"),
|
||||||
|
"shop/trade": ("Trade Supplier", "🏭"),
|
||||||
|
"shop/travel_agency": ("Travel Agency", "✈️"),
|
||||||
|
"shop/trophy": ("Trophy Shop", "🏆"),
|
||||||
|
"shop/tyres": ("Tyre Shop", "🛞"),
|
||||||
|
"shop/vacant": ("Vacant Shop", "🏚️"),
|
||||||
|
"shop/variety_store": ("Variety Store", "🏪"),
|
||||||
|
"shop/video": ("Video Shop", "📀"),
|
||||||
|
"shop/video_games": ("Video Game Shop", "🎮"),
|
||||||
|
"shop/watches": ("Watch Shop", "⌚"),
|
||||||
|
"shop/water_sports": ("Water Sports Shop", "🏄"),
|
||||||
|
"shop/weapons": ("Weapons Shop", "🗡️"),
|
||||||
|
"shop/wedding": ("Wedding Shop", "💒"),
|
||||||
|
"shop/wholesale": ("Wholesaler", "📦"),
|
||||||
|
"shop/wigs": ("Wig Shop", "💇"),
|
||||||
|
"shop/window_blind": ("Blinds Shop", "🪟"),
|
||||||
|
"shop/windows": ("Window Shop", "🪟"),
|
||||||
|
"shop/wine": ("Wine Shop", "🍷"),
|
||||||
|
"shop/wool": ("Wool Shop", "🧶"),
|
||||||
|
"shop/yes": ("Shop", "🛍️"),
|
||||||
|
# tourism
|
||||||
|
"tourism/artwork": ("Public Artwork", "🎨"),
|
||||||
|
"tourism/attraction": ("Tourist Attraction", "📸"),
|
||||||
|
"tourism/camp_site": ("Campsite", "⛺"),
|
||||||
|
"tourism/caravan_site": ("Caravan Site", "🚐"),
|
||||||
|
"tourism/chalet": ("Chalet", "🏔️"),
|
||||||
|
"tourism/gallery": ("Gallery", "🖼️"),
|
||||||
|
"tourism/guest_house": ("Guest House", "🏡"),
|
||||||
|
"tourism/hostel": ("Hostel", "🛏️"),
|
||||||
|
"tourism/hotel": ("Hotel", "🏨"),
|
||||||
|
"tourism/motel": ("Motel", "🏨"),
|
||||||
|
"tourism/museum": ("Museum", "🏛️"),
|
||||||
|
"tourism/picnic_site": ("Picnic Site", "🧺"),
|
||||||
|
"tourism/preserved_railway": ("Heritage Railway", "🚂"),
|
||||||
|
"tourism/theme_park": ("Theme Park", "🎢"),
|
||||||
|
"tourism/viewpoint": ("Viewpoint", "🔭"),
|
||||||
|
"tourism/zoo": ("Zoo", "🦁"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NAPTAN_EMOJIS: dict[str, str] = {
|
||||||
|
"Airport": "✈️",
|
||||||
|
"Ferry": "⛴️",
|
||||||
|
"Rail station": "🚆",
|
||||||
|
"Bus stop": "🚏",
|
||||||
|
"Bus station": "🚌",
|
||||||
|
"Taxi rank": "🚕",
|
||||||
|
"Metro or Tram stop": "🚊",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def transform(input_path: Path, naptan_path: Path | None = None) -> pl.LazyFrame:
|
||||||
|
lf = pl.scan_parquet(input_path)
|
||||||
|
|
||||||
|
# Get all unique categories present in the data
|
||||||
|
all_categories = lf.select("category").unique().collect(engine="streaming").to_series().to_list()
|
||||||
|
|
||||||
|
# Verify every non-dropped category has a mapping
|
||||||
|
unmapped = []
|
||||||
|
for cat in all_categories:
|
||||||
|
if cat not in DROP_CATEGORIES and cat not in CATEGORY_MAP:
|
||||||
|
unmapped.append(cat)
|
||||||
|
if unmapped:
|
||||||
|
raise ValueError(f"Categories missing from CATEGORY_MAP: {sorted(unmapped)}")
|
||||||
|
|
||||||
|
# Verify every CATEGORY_MAP key actually exists in the data (catch typos)
|
||||||
|
mapped_but_absent = []
|
||||||
|
all_set = set(all_categories)
|
||||||
|
for cat in CATEGORY_MAP:
|
||||||
|
if cat not in all_set:
|
||||||
|
mapped_but_absent.append(cat)
|
||||||
|
if mapped_but_absent:
|
||||||
|
raise ValueError(
|
||||||
|
f"CATEGORY_MAP contains categories not in data: {sorted(mapped_but_absent)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Drop unwanted categories
|
||||||
|
lf = lf.filter(~pl.col("category").is_in(list(DROP_CATEGORIES)))
|
||||||
|
|
||||||
|
# Build name and emoji lookup expressions
|
||||||
|
name_mapping = {k: v[0] for k, v in CATEGORY_MAP.items()}
|
||||||
|
emoji_mapping = {k: v[1] for k, v in CATEGORY_MAP.items()}
|
||||||
|
|
||||||
|
# Check no friendly names are missing (defensive)
|
||||||
|
missing_names = [k for k, v in CATEGORY_MAP.items() if not v[0]]
|
||||||
|
if missing_names:
|
||||||
|
raise ValueError(f"Empty friendly names for: {missing_names}")
|
||||||
|
missing_emojis = [k for k, v in CATEGORY_MAP.items() if not v[1]]
|
||||||
|
if missing_emojis:
|
||||||
|
raise ValueError(f"Empty emojis for: {missing_emojis}")
|
||||||
|
|
||||||
|
# Derive group from the first component of the raw category key, title-cased
|
||||||
|
group_mapping = {
|
||||||
|
k: k.split("/")[0].replace("_", " ").title() for k in CATEGORY_MAP
|
||||||
|
}
|
||||||
|
|
||||||
|
lf = lf.with_columns(
|
||||||
|
pl.col("category").replace_strict(group_mapping).alias("group"),
|
||||||
|
pl.col("category").replace_strict(name_mapping).alias("category"),
|
||||||
|
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
|
||||||
|
)
|
||||||
|
|
||||||
|
naptan = pl.scan_parquet(naptan_path).with_columns(
|
||||||
|
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
|
||||||
|
pl.lit("Public Transport").alias("group"),
|
||||||
|
)
|
||||||
|
return pl.concat([lf, naptan], how="diagonal_relaxed")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Transform raw POIs to filtered version with friendly names"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--input", type=Path, required=True, help="Raw POIs parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--naptan", type=Path, required=True, help="NaPTAN stations parquet file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
df = transform(args.input, args.naptan).collect(engine="streaming")
|
||||||
|
|
||||||
|
df.write_parquet(args.output)
|
||||||
|
|
||||||
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||||
|
print(f"Wrote {args.output} ({size_mb:.1f} MB, {len(df):,} POIs)")
|
||||||
|
print(f"\nCategories ({df['category'].n_unique()}):")
|
||||||
|
counts = df.group_by("category", "emoji").len().sort("len", descending=True)
|
||||||
|
for row in counts.iter_rows(named=True):
|
||||||
|
print(f" {row['emoji']} {row['category']}: {row['len']:,}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
13
pipeline/utils/__init__.py
Normal file
13
pipeline/utils/__init__.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
from .download import download, extract_zip
|
||||||
|
from .fuzzy_join import fuzzy_join_on_postcode
|
||||||
|
from .haversine import haversine_km, haversine_km_expr
|
||||||
|
from .poi_counts import count_pois_within_radius
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"download",
|
||||||
|
"extract_zip",
|
||||||
|
"fuzzy_join_on_postcode",
|
||||||
|
"haversine_km",
|
||||||
|
"haversine_km_expr",
|
||||||
|
"count_pois_within_radius",
|
||||||
|
]
|
||||||
40
pipeline/utils/download.py
Normal file
40
pipeline/utils/download.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
"""Shared download and extraction helpers for pipeline scripts."""
|
||||||
|
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def download(url: str, output_path: Path, *, timeout: float = 120) -> None:
|
||||||
|
"""Stream-download a URL to a local file with a tqdm progress bar."""
|
||||||
|
with httpx.stream(
|
||||||
|
"GET",
|
||||||
|
url,
|
||||||
|
follow_redirects=True,
|
||||||
|
timeout=httpx.Timeout(30.0, read=timeout),
|
||||||
|
) as response:
|
||||||
|
response.raise_for_status() # pyright: ignore[reportUnusedCallResult]
|
||||||
|
total = int(response.headers.get("content-length", 0))
|
||||||
|
|
||||||
|
with (
|
||||||
|
open(output_path, "wb") as f,
|
||||||
|
tqdm(
|
||||||
|
total=total or None,
|
||||||
|
unit="B",
|
||||||
|
unit_scale=True,
|
||||||
|
unit_divisor=1024,
|
||||||
|
desc=output_path.name,
|
||||||
|
) as pbar,
|
||||||
|
):
|
||||||
|
for chunk in response.iter_bytes(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
pbar.update(len(chunk))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_zip(zip_path: Path, extract_dir: Path) -> None:
|
||||||
|
"""Extract a ZIP archive into the given directory."""
|
||||||
|
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||||
|
zf.extractall(extract_dir)
|
||||||
194
pipeline/utils/fuzzy_join.py
Normal file
194
pipeline/utils/fuzzy_join.py
Normal file
|
|
@ -0,0 +1,194 @@
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from os import cpu_count
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
from thefuzz import fuzz
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
_NUMBER_RE = re.compile(r"\d+")
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize(s: pl.Expr) -> pl.Expr:
|
||||||
|
return (
|
||||||
|
s.str.to_uppercase()
|
||||||
|
.str.replace_all(r"[,.\-]", " ")
|
||||||
|
.str.replace_all(r"\s+", " ")
|
||||||
|
.str.strip_chars()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fuzzy_join_on_postcode(
|
||||||
|
left: pl.LazyFrame,
|
||||||
|
right: pl.LazyFrame,
|
||||||
|
left_address_col: str,
|
||||||
|
right_address_col: str,
|
||||||
|
left_postcode_col: str,
|
||||||
|
right_postcode_col: str,
|
||||||
|
) -> pl.LazyFrame:
|
||||||
|
"""Fuzzy join two LazyFrames by matching addresses within postcode buckets.
|
||||||
|
|
||||||
|
Sinks each side to a temporary parquet file so the upstream pipeline
|
||||||
|
executes only once. The matching phase collects just three narrow
|
||||||
|
columns (index, address, postcode) via projection pushdown, and the
|
||||||
|
final join reads the remaining columns lazily.
|
||||||
|
|
||||||
|
Returns a LazyFrame with all left and right columns. Unmatched rows
|
||||||
|
have null right columns.
|
||||||
|
"""
|
||||||
|
|
||||||
|
tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_")
|
||||||
|
left_path = Path(tmpdir) / "left.parquet"
|
||||||
|
right_path = Path(tmpdir) / "right.parquet"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Materialise each side exactly once, with a row index, to temp parquet.
|
||||||
|
left.with_row_index("_left_idx").sink_parquet(left_path)
|
||||||
|
right.with_row_index("_right_idx").sink_parquet(right_path)
|
||||||
|
|
||||||
|
# Collect only the narrow columns needed for matching (projection pushdown).
|
||||||
|
left_match = (
|
||||||
|
pl.scan_parquet(left_path)
|
||||||
|
.select(
|
||||||
|
"_left_idx",
|
||||||
|
_normalize(pl.col(left_address_col)).alias("_left_address"),
|
||||||
|
pl.col(left_postcode_col)
|
||||||
|
.str.strip_chars()
|
||||||
|
.str.to_uppercase()
|
||||||
|
.alias("_left_postcode"),
|
||||||
|
)
|
||||||
|
.collect(engine="streaming")
|
||||||
|
)
|
||||||
|
|
||||||
|
right_match = (
|
||||||
|
pl.scan_parquet(right_path)
|
||||||
|
.select(
|
||||||
|
"_right_idx",
|
||||||
|
_normalize(pl.col(right_address_col)).alias("_right_address"),
|
||||||
|
pl.col(right_postcode_col)
|
||||||
|
.str.strip_chars()
|
||||||
|
.str.to_uppercase()
|
||||||
|
.alias("_right_postcode"),
|
||||||
|
)
|
||||||
|
.unique(subset=["_right_address", "_right_postcode"], keep="first")
|
||||||
|
.collect(engine="streaming")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Group right side by postcode for fast lookup
|
||||||
|
right_by_postcode: dict[str, list[tuple[int, str]]] = {}
|
||||||
|
for idx, postcode, address in zip(
|
||||||
|
right_match["_right_idx"],
|
||||||
|
right_match["_right_postcode"],
|
||||||
|
right_match["_right_address"],
|
||||||
|
):
|
||||||
|
if postcode is not None:
|
||||||
|
right_by_postcode.setdefault(postcode, []).append((idx, address))
|
||||||
|
|
||||||
|
# Group left side by postcode
|
||||||
|
left_by_postcode: dict[str, list[tuple[int, str]]] = {}
|
||||||
|
for idx, postcode, address in zip(
|
||||||
|
left_match["_left_idx"],
|
||||||
|
left_match["_left_postcode"],
|
||||||
|
left_match["_left_address"],
|
||||||
|
):
|
||||||
|
if address is not None and postcode is not None:
|
||||||
|
left_by_postcode.setdefault(postcode, []).append((idx, address))
|
||||||
|
|
||||||
|
del left_match, right_match
|
||||||
|
|
||||||
|
# Build tasks for each postcode bucket
|
||||||
|
tasks = [
|
||||||
|
(left_entries, right_by_postcode[postcode])
|
||||||
|
for postcode, left_entries in left_by_postcode.items()
|
||||||
|
if postcode in right_by_postcode
|
||||||
|
]
|
||||||
|
|
||||||
|
# Score all pairwise matches in parallel, then greedily assign from
|
||||||
|
# highest score downward so best pairs lock in first.
|
||||||
|
all_pairs: list[tuple[int, int, int]] = [] # (score, left_idx, right_idx)
|
||||||
|
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
|
||||||
|
for pairs in tqdm(
|
||||||
|
executor.map(_score_bucket, tasks, chunksize=64),
|
||||||
|
total=len(tasks),
|
||||||
|
desc="Fuzzy matching",
|
||||||
|
):
|
||||||
|
all_pairs.extend(pairs)
|
||||||
|
|
||||||
|
del tasks, left_by_postcode, right_by_postcode
|
||||||
|
|
||||||
|
# Sort descending by score so best matches are assigned first
|
||||||
|
all_pairs.sort(key=lambda t: (t[0], -t[1]), reverse=True)
|
||||||
|
|
||||||
|
matches: list[tuple[int, int]] = []
|
||||||
|
matched_left: set[int] = set()
|
||||||
|
matched_right: set[int] = set()
|
||||||
|
|
||||||
|
for _score, left_idx, right_idx in all_pairs:
|
||||||
|
if left_idx in matched_left or right_idx in matched_right:
|
||||||
|
continue
|
||||||
|
matches.append((left_idx, right_idx))
|
||||||
|
matched_left.add(left_idx)
|
||||||
|
matched_right.add(right_idx)
|
||||||
|
|
||||||
|
del all_pairs, matched_left, matched_right
|
||||||
|
|
||||||
|
# Build a small mapping LazyFrame and join back to the cached parquets.
|
||||||
|
if matches:
|
||||||
|
mapping = pl.LazyFrame(
|
||||||
|
{
|
||||||
|
"_left_idx": pl.Series([m[0] for m in matches], dtype=pl.UInt32),
|
||||||
|
"_right_idx": pl.Series([m[1] for m in matches], dtype=pl.UInt32),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
mapping = pl.LazyFrame(
|
||||||
|
{
|
||||||
|
"_left_idx": pl.Series([], dtype=pl.UInt32),
|
||||||
|
"_right_idx": pl.Series([], dtype=pl.UInt32),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
left_cached = pl.scan_parquet(left_path)
|
||||||
|
right_cached = pl.scan_parquet(right_path)
|
||||||
|
|
||||||
|
return (
|
||||||
|
left_cached.join(mapping, on="_left_idx", how="left")
|
||||||
|
.join(right_cached, on="_right_idx", how="left")
|
||||||
|
.drop("_left_idx", "_right_idx")
|
||||||
|
)
|
||||||
|
except BaseException:
|
||||||
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _numbers_compatible(a: str, b: str) -> bool:
|
||||||
|
"""Check that numeric tokens (flat/house numbers) in the shorter set are a subset of the longer.
|
||||||
|
|
||||||
|
Returns False if one address has numbers and the other doesn't.
|
||||||
|
"""
|
||||||
|
nums_a = set(_NUMBER_RE.findall(a))
|
||||||
|
nums_b = set(_NUMBER_RE.findall(b))
|
||||||
|
smaller, larger = (
|
||||||
|
(nums_a, nums_b) if len(nums_a) <= len(nums_b) else (nums_b, nums_a)
|
||||||
|
)
|
||||||
|
if not smaller and larger:
|
||||||
|
return False
|
||||||
|
return smaller.issubset(larger)
|
||||||
|
|
||||||
|
|
||||||
|
def _score_bucket(
|
||||||
|
args: tuple[list[tuple[int, str]], list[tuple[int, str]], int],
|
||||||
|
) -> list[tuple[int, int, int]]:
|
||||||
|
"""Score all address pairs within a single postcode bucket."""
|
||||||
|
left_entries, right_entries = args
|
||||||
|
pairs = []
|
||||||
|
for left_row, left_address in left_entries:
|
||||||
|
for right_row, right_address in right_entries:
|
||||||
|
if not _numbers_compatible(left_address, right_address):
|
||||||
|
continue
|
||||||
|
score = fuzz.token_sort_ratio(left_address, right_address)
|
||||||
|
pairs.append((score, left_row, right_row))
|
||||||
|
return pairs
|
||||||
43
pipeline/utils/haversine.py
Normal file
43
pipeline/utils/haversine.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
import math
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
_EARTH_RADIUS_KM = 6371.0
|
||||||
|
|
||||||
|
|
||||||
|
def haversine_km(
|
||||||
|
lat1: np.ndarray, lon1: np.ndarray, lat2: float, lon2: float
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Compute haversine distance in km between arrays (lat1, lon1) and a single point (lat2, lon2)."""
|
||||||
|
lat1_rad = np.radians(lat1)
|
||||||
|
lon1_rad = np.radians(lon1)
|
||||||
|
lat2_rad = np.radians(lat2)
|
||||||
|
lon2_rad = np.radians(lon2)
|
||||||
|
dlat = lat2_rad - lat1_rad
|
||||||
|
dlon = lon2_rad - lon1_rad
|
||||||
|
a = (
|
||||||
|
np.sin(dlat / 2) ** 2
|
||||||
|
+ np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon / 2) ** 2
|
||||||
|
)
|
||||||
|
c = 2 * np.arcsin(np.sqrt(a))
|
||||||
|
return _EARTH_RADIUS_KM * c
|
||||||
|
|
||||||
|
|
||||||
|
def haversine_km_expr(
|
||||||
|
lat_col: str, lon_col: str, dest_lat: float, dest_lon: float
|
||||||
|
) -> pl.Expr:
|
||||||
|
"""Polars expression computing haversine distance in km to a fixed point."""
|
||||||
|
dest_lat_rad = math.radians(dest_lat)
|
||||||
|
dest_lon_rad = math.radians(dest_lon)
|
||||||
|
|
||||||
|
lat_rad = pl.col(lat_col).radians()
|
||||||
|
lon_rad = pl.col(lon_col).radians()
|
||||||
|
|
||||||
|
dlat = pl.lit(dest_lat_rad) - lat_rad
|
||||||
|
dlon = pl.lit(dest_lon_rad) - lon_rad
|
||||||
|
|
||||||
|
a = (dlat / 2).sin() ** 2 + pl.lit(dest_lat_rad).cos() * lat_rad.cos() * (
|
||||||
|
dlon / 2
|
||||||
|
).sin() ** 2
|
||||||
|
return 2 * _EARTH_RADIUS_KM * a.sqrt().arcsin()
|
||||||
174
pipeline/utils/poi_counts.py
Normal file
174
pipeline/utils/poi_counts.py
Normal file
|
|
@ -0,0 +1,174 @@
|
||||||
|
"""Count POIs within a radius of properties, optimized via postcode deduplication."""
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from .haversine import haversine_km
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pois_per_postcode(
|
||||||
|
postcodes_df: pl.DataFrame,
|
||||||
|
pois: pl.DataFrame,
|
||||||
|
groups: dict[str, list[str]],
|
||||||
|
radius_km: float = 2.0,
|
||||||
|
) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
For each unique postcode, count POIs within radius_km by category group.
|
||||||
|
Uses spatial grid with vectorized distance calculations.
|
||||||
|
"""
|
||||||
|
print(f"Counting POIs within {radius_km}km per postcode...")
|
||||||
|
|
||||||
|
n_postcodes = len(postcodes_df)
|
||||||
|
n_pois = len(pois)
|
||||||
|
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
|
||||||
|
|
||||||
|
# Build spatial grid for POIs (0.05 degree cells ~5.5km)
|
||||||
|
grid_size = 0.05
|
||||||
|
print(" Building POI spatial grid...")
|
||||||
|
|
||||||
|
# Convert to numpy arrays
|
||||||
|
poi_lats = pois["lat"].to_numpy()
|
||||||
|
poi_lngs = pois["lng"].to_numpy()
|
||||||
|
poi_cats = pois["category"].to_numpy()
|
||||||
|
|
||||||
|
# Compute grid coordinates for all POIs
|
||||||
|
poi_grid_lats = np.floor(poi_lats / grid_size).astype(np.int32)
|
||||||
|
poi_grid_lngs = np.floor(poi_lngs / grid_size).astype(np.int32)
|
||||||
|
|
||||||
|
# Build grid cell lookup using numpy indexing
|
||||||
|
poi_grid = {}
|
||||||
|
for i in range(n_pois):
|
||||||
|
key = (poi_grid_lats[i], poi_grid_lngs[i])
|
||||||
|
if key not in poi_grid:
|
||||||
|
poi_grid[key] = []
|
||||||
|
poi_grid[key].append(i)
|
||||||
|
|
||||||
|
# Convert grid values to numpy arrays for faster indexing
|
||||||
|
for key in poi_grid:
|
||||||
|
poi_grid[key] = np.array(poi_grid[key], dtype=np.int32)
|
||||||
|
|
||||||
|
print(f" POI grid has {len(poi_grid):,} occupied cells")
|
||||||
|
|
||||||
|
# Pre-compute category masks
|
||||||
|
category_masks = {}
|
||||||
|
for group, categories in groups.items():
|
||||||
|
mask = np.isin(poi_cats, categories)
|
||||||
|
category_masks[group] = mask
|
||||||
|
print(f" {group}: {mask.sum():,} POIs")
|
||||||
|
|
||||||
|
# Extract postcode coordinates as numpy arrays
|
||||||
|
pc_lats = postcodes_df["lat"].to_numpy()
|
||||||
|
pc_lons = postcodes_df["lon"].to_numpy()
|
||||||
|
pc_codes = postcodes_df["postcode"].to_list()
|
||||||
|
|
||||||
|
# Initialize result arrays
|
||||||
|
result_counts = {
|
||||||
|
group: np.zeros(n_postcodes, dtype=np.int32) for group in groups
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process in batches with progress
|
||||||
|
batch_size = 50000
|
||||||
|
n_batches = (n_postcodes + batch_size - 1) // batch_size
|
||||||
|
|
||||||
|
print(f" Processing {n_postcodes:,} postcodes in {n_batches} batches...")
|
||||||
|
|
||||||
|
for batch_idx in range(n_batches):
|
||||||
|
start_idx = batch_idx * batch_size
|
||||||
|
end_idx = min(start_idx + batch_size, n_postcodes)
|
||||||
|
|
||||||
|
if batch_idx % 5 == 0:
|
||||||
|
print(
|
||||||
|
f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process batch
|
||||||
|
for i in range(start_idx, end_idx):
|
||||||
|
pc_lat = pc_lats[i]
|
||||||
|
pc_lon = pc_lons[i]
|
||||||
|
|
||||||
|
# Find grid cells to check (3x3 grid)
|
||||||
|
grid_lat = int(np.floor(pc_lat / grid_size))
|
||||||
|
grid_lng = int(np.floor(pc_lon / grid_size))
|
||||||
|
|
||||||
|
# Collect nearby POI indices
|
||||||
|
nearby_indices = []
|
||||||
|
for dlat in [-1, 0, 1]:
|
||||||
|
for dlng in [-1, 0, 1]:
|
||||||
|
cell_key = (grid_lat + dlat, grid_lng + dlng)
|
||||||
|
if cell_key in poi_grid:
|
||||||
|
nearby_indices.append(poi_grid[cell_key])
|
||||||
|
|
||||||
|
if not nearby_indices:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Concatenate all nearby POI indices
|
||||||
|
nearby = np.concatenate(nearby_indices)
|
||||||
|
|
||||||
|
# Vectorized distance calculation for all nearby POIs
|
||||||
|
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lat, pc_lon)
|
||||||
|
|
||||||
|
# Filter by radius
|
||||||
|
within_mask = distances <= radius_km
|
||||||
|
within_indices = nearby[within_mask]
|
||||||
|
|
||||||
|
if len(within_indices) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Count by category group using pre-computed masks
|
||||||
|
for group, cat_mask in category_masks.items():
|
||||||
|
result_counts[group][i] = cat_mask[within_indices].sum()
|
||||||
|
|
||||||
|
# Build result dataframe
|
||||||
|
result_data = {"postcode": pc_codes}
|
||||||
|
for group in groups:
|
||||||
|
result_data[f"{group}_{int(radius_km)}km"] = result_counts[group]
|
||||||
|
|
||||||
|
result = pl.DataFrame(result_data)
|
||||||
|
print(" Completed POI counting")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def count_pois_within_radius(
|
||||||
|
properties: pl.DataFrame, pois: pl.DataFrame, radius_km: float = 2.0
|
||||||
|
) -> dict[str, pl.Series]:
|
||||||
|
"""
|
||||||
|
Count POIs within radius for properties, optimized by deduplicating postcodes.
|
||||||
|
|
||||||
|
Returns dict of {column_name: count_series} aligned to properties dataframe.
|
||||||
|
"""
|
||||||
|
# Get unique postcodes with coordinates
|
||||||
|
print("Deduplicating postcodes...")
|
||||||
|
unique_postcodes = properties.select(["postcode", "lat", "lon"]).unique(
|
||||||
|
subset=["postcode"]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f" {len(properties):,} properties → {len(unique_postcodes):,} unique postcodes"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Count POIs per postcode
|
||||||
|
postcode_counts = _count_pois_per_postcode(unique_postcodes, pois, radius_km)
|
||||||
|
|
||||||
|
print(" Writing postcode counts to temp file...")
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".parquet") as tmp:
|
||||||
|
tmp_path = tmp.name
|
||||||
|
postcode_counts.write_parquet(tmp_path)
|
||||||
|
|
||||||
|
# Join using lazy evaluation
|
||||||
|
print(" Joining counts back to properties (lazy)...")
|
||||||
|
count_cols = [f"{group}_{int(radius_km)}km" for group in POI_GROUPS]
|
||||||
|
|
||||||
|
# Convert properties to lazy frame, join, then collect
|
||||||
|
result_lazy = (
|
||||||
|
properties.lazy()
|
||||||
|
.select("postcode")
|
||||||
|
.join(pl.scan_parquet(tmp_path), on="postcode", how="left")
|
||||||
|
.select(count_cols)
|
||||||
|
.fill_null(0)
|
||||||
|
)
|
||||||
|
|
||||||
|
result_df = result_lazy.collect(engine="streaming")
|
||||||
|
|
||||||
|
return {col: result_df[col] for col in count_cols}
|
||||||
46
pipeline/utils/test_fuzzy_join.py
Normal file
46
pipeline/utils/test_fuzzy_join.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
from pipeline.utils import fuzzy_join_on_postcode
|
||||||
|
|
||||||
|
POSTCODE = "E14 2DG"
|
||||||
|
|
||||||
|
# Price paid: unique addresses for this postcode
|
||||||
|
pp = (
|
||||||
|
pl.scan_parquet("data/price-paid-complete.parquet")
|
||||||
|
.filter(pl.col("postcode") == POSTCODE)
|
||||||
|
.select("paon", "saon", "street", "postcode")
|
||||||
|
.unique()
|
||||||
|
.sort("saon")
|
||||||
|
.with_columns(
|
||||||
|
pl.concat_str(
|
||||||
|
[pl.col("saon"), pl.col("paon"), pl.col("street")],
|
||||||
|
separator=" ",
|
||||||
|
ignore_nulls=True,
|
||||||
|
).alias("pp_address"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# EPC: latest inspection per address for this postcode
|
||||||
|
epc = (
|
||||||
|
pl.scan_csv("data/epc/certificates.csv")
|
||||||
|
.select("ADDRESS", "POSTCODE", "INSPECTION_DATE")
|
||||||
|
.filter(pl.col("POSTCODE").str.strip_chars() == POSTCODE)
|
||||||
|
.sort("INSPECTION_DATE", descending=True)
|
||||||
|
.unique("ADDRESS")
|
||||||
|
.sort("ADDRESS")
|
||||||
|
)
|
||||||
|
|
||||||
|
result = fuzzy_join_on_postcode(
|
||||||
|
left=pp,
|
||||||
|
right=epc,
|
||||||
|
left_address_col="pp_address",
|
||||||
|
right_address_col="ADDRESS",
|
||||||
|
left_postcode_col="postcode",
|
||||||
|
right_postcode_col="POSTCODE",
|
||||||
|
).collect()
|
||||||
|
|
||||||
|
snapshot = result.select("pp_address", "ADDRESS").sort("pp_address")
|
||||||
|
|
||||||
|
print("Testing the matching between EPC and PP addresses")
|
||||||
|
with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=80):
|
||||||
|
print(snapshot)
|
||||||
147
pipeline/utils/test_haversine.py
Normal file
147
pipeline/utils/test_haversine.py
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
import numpy as np
|
||||||
|
import polars as pl
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pipeline.utils.haversine import haversine_km, haversine_km_expr
|
||||||
|
|
||||||
|
|
||||||
|
class TestHaversineKm:
|
||||||
|
"""Test numpy-based haversine distance calculation."""
|
||||||
|
|
||||||
|
def test_same_point(self):
|
||||||
|
"""Distance from a point to itself should be zero."""
|
||||||
|
lat = np.array([51.5074])
|
||||||
|
lon = np.array([-0.1278])
|
||||||
|
dist = haversine_km(lat, lon, 51.5074, -0.1278)
|
||||||
|
assert np.allclose(dist, 0.0, atol=1e-10)
|
||||||
|
|
||||||
|
def test_known_distance_london_to_paris(self):
|
||||||
|
"""Test distance from London to Paris (~344 km)."""
|
||||||
|
# London coordinates
|
||||||
|
london_lat = np.array([51.5074])
|
||||||
|
london_lon = np.array([-0.1278])
|
||||||
|
# Paris coordinates
|
||||||
|
paris_lat = 48.8566
|
||||||
|
paris_lon = 2.3522
|
||||||
|
|
||||||
|
dist = haversine_km(london_lat, london_lon, paris_lat, paris_lon)
|
||||||
|
# Expected distance is approximately 344 km
|
||||||
|
assert np.allclose(dist[0], 344, rtol=0.01)
|
||||||
|
|
||||||
|
def test_known_distance_new_york_to_london(self):
|
||||||
|
"""Test distance from New York to London (~5570 km)."""
|
||||||
|
ny_lat = np.array([40.7128])
|
||||||
|
ny_lon = np.array([-74.0060])
|
||||||
|
london_lat = 51.5074
|
||||||
|
london_lon = -0.1278
|
||||||
|
|
||||||
|
dist = haversine_km(ny_lat, ny_lon, london_lat, london_lon)
|
||||||
|
# Expected distance is approximately 5570 km
|
||||||
|
assert np.allclose(dist[0], 5570, rtol=0.01)
|
||||||
|
|
||||||
|
def test_multiple_points(self):
|
||||||
|
"""Test calculating distances from multiple points to a single destination."""
|
||||||
|
lats = np.array([51.5074, 48.8566, 40.7128]) # London, Paris, NYC
|
||||||
|
lons = np.array([-0.1278, 2.3522, -74.0060])
|
||||||
|
# Distance to Edinburgh
|
||||||
|
edinburgh_lat = 55.9533
|
||||||
|
edinburgh_lon = -3.1883
|
||||||
|
|
||||||
|
dists = haversine_km(lats, lons, edinburgh_lat, edinburgh_lon)
|
||||||
|
|
||||||
|
# All distances should be positive
|
||||||
|
assert np.all(dists > 0)
|
||||||
|
# London to Edinburgh should be shortest (~530 km)
|
||||||
|
assert dists[0] < dists[1] < dists[2]
|
||||||
|
assert np.allclose(dists[0], 530, rtol=0.02)
|
||||||
|
|
||||||
|
def test_equator_points(self):
|
||||||
|
"""Test distance along the equator."""
|
||||||
|
# Two points on the equator, 1 degree apart
|
||||||
|
lat = np.array([0.0])
|
||||||
|
lon1 = np.array([0.0])
|
||||||
|
lon2 = 1.0
|
||||||
|
|
||||||
|
dist = haversine_km(lat, lon1, 0.0, lon2)
|
||||||
|
# 1 degree at equator ≈ 111 km
|
||||||
|
assert np.allclose(dist[0], 111.2, rtol=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHaversineKmExpr:
|
||||||
|
"""Test Polars expression-based haversine distance calculation."""
|
||||||
|
|
||||||
|
def test_same_point(self):
|
||||||
|
"""Distance from a point to itself should be zero."""
|
||||||
|
df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
|
||||||
|
result = df.select(
|
||||||
|
haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
|
||||||
|
)
|
||||||
|
assert result["dist"][0] == pytest.approx(0.0, abs=1e-10)
|
||||||
|
|
||||||
|
def test_known_distance_london_to_paris(self):
|
||||||
|
"""Test distance from London to Paris (~344 km)."""
|
||||||
|
df = pl.DataFrame({"lat": [51.5074], "lon": [-0.1278]})
|
||||||
|
result = df.select(
|
||||||
|
haversine_km_expr("lat", "lon", 48.8566, 2.3522).alias("dist")
|
||||||
|
)
|
||||||
|
assert result["dist"][0] == pytest.approx(344, rel=0.01)
|
||||||
|
|
||||||
|
def test_known_distance_new_york_to_london(self):
|
||||||
|
"""Test distance from New York to London (~5570 km)."""
|
||||||
|
df = pl.DataFrame({"lat": [40.7128], "lon": [-74.0060]})
|
||||||
|
result = df.select(
|
||||||
|
haversine_km_expr("lat", "lon", 51.5074, -0.1278).alias("dist")
|
||||||
|
)
|
||||||
|
assert result["dist"][0] == pytest.approx(5570, rel=0.01)
|
||||||
|
|
||||||
|
def test_multiple_points(self):
|
||||||
|
"""Test calculating distances from multiple points to a single destination."""
|
||||||
|
df = pl.DataFrame(
|
||||||
|
{
|
||||||
|
"lat": [51.5074, 48.8566, 40.7128], # London, Paris, NYC
|
||||||
|
"lon": [-0.1278, 2.3522, -74.0060],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# Distance to Edinburgh
|
||||||
|
result = df.select(
|
||||||
|
haversine_km_expr("lat", "lon", 55.9533, -3.1883).alias("dist")
|
||||||
|
)
|
||||||
|
|
||||||
|
dists = result["dist"].to_numpy()
|
||||||
|
# All distances should be positive
|
||||||
|
assert np.all(dists > 0)
|
||||||
|
# London to Edinburgh should be shortest (~530 km)
|
||||||
|
assert dists[0] < dists[1] < dists[2]
|
||||||
|
assert dists[0] == pytest.approx(530, rel=0.02)
|
||||||
|
|
||||||
|
def test_equator_points(self):
|
||||||
|
"""Test distance along the equator."""
|
||||||
|
df = pl.DataFrame({"lat": [0.0], "lon": [0.0]})
|
||||||
|
result = df.select(haversine_km_expr("lat", "lon", 0.0, 1.0).alias("dist"))
|
||||||
|
# 1 degree at equator ≈ 111 km
|
||||||
|
assert result["dist"][0] == pytest.approx(111.2, rel=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHaversineConsistency:
|
||||||
|
"""Test that both implementations give consistent results."""
|
||||||
|
|
||||||
|
def test_numpy_and_polars_match(self):
|
||||||
|
"""Both implementations should give identical results."""
|
||||||
|
# Test data
|
||||||
|
lats = np.array([51.5074, 48.8566, 40.7128, 55.9533, 52.5200])
|
||||||
|
lons = np.array([-0.1278, 2.3522, -74.0060, -3.1883, 13.4050])
|
||||||
|
dest_lat = 41.9028 # Rome
|
||||||
|
dest_lon = 12.4964
|
||||||
|
|
||||||
|
# Numpy version
|
||||||
|
numpy_dists = haversine_km(lats, lons, dest_lat, dest_lon)
|
||||||
|
|
||||||
|
# Polars version
|
||||||
|
df = pl.DataFrame({"lat": lats, "lon": lons})
|
||||||
|
polars_result = df.select(
|
||||||
|
haversine_km_expr("lat", "lon", dest_lat, dest_lon).alias("dist")
|
||||||
|
)
|
||||||
|
polars_dists = polars_result["dist"].to_numpy()
|
||||||
|
|
||||||
|
# Should be identical (or at least very close due to floating point)
|
||||||
|
assert np.allclose(numpy_dists, polars_dists, rtol=1e-10)
|
||||||
93
pipeline/utils/test_poi_counts.py
Normal file
93
pipeline/utils/test_poi_counts.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
import polars as pl
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pipeline.utils.poi_counts import POI_GROUPS, count_pois_within_radius
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def pois():
|
||||||
|
"""POIs clustered around two locations: central London and 10km away."""
|
||||||
|
return pl.DataFrame(
|
||||||
|
{
|
||||||
|
"lat": [51.5074, 51.5075, 51.5080, 51.5076, 51.5073, 51.60],
|
||||||
|
"lng": [-0.1278, -0.1280, -0.1275, -0.1279, -0.1277, -0.20],
|
||||||
|
"category": [
|
||||||
|
"Restaurant",
|
||||||
|
"Fast Food",
|
||||||
|
"Supermarket",
|
||||||
|
"Park",
|
||||||
|
"Station",
|
||||||
|
"Restaurant", # too far from any property
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def properties():
|
||||||
|
"""Two properties at the same postcode near central London, one at a distant postcode."""
|
||||||
|
return pl.DataFrame(
|
||||||
|
{
|
||||||
|
"postcode": ["EC1A 1BB", "EC1A 1BB", "ZZ99 9ZZ"],
|
||||||
|
"lat": [51.5074, 51.5074, 55.0],
|
||||||
|
"lon": [-0.1278, -0.1278, -3.0],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_counts_pois_within_radius(properties, pois):
|
||||||
|
result = count_pois_within_radius(properties, pois, radius_km=2.0)
|
||||||
|
|
||||||
|
assert set(result.keys()) == {f"{g}_2km" for g in POI_GROUPS}
|
||||||
|
|
||||||
|
# Result Series must be aligned to properties (3 rows)
|
||||||
|
for col, series in result.items():
|
||||||
|
assert len(series) == 3, f"{col} has {len(series)} rows, expected 3"
|
||||||
|
|
||||||
|
# First two rows share a postcode near the central London cluster
|
||||||
|
assert result["restaurants_2km"][0] == 2 # Restaurant + Fast Food
|
||||||
|
assert result["groceries_2km"][0] == 1 # Supermarket
|
||||||
|
assert result["parks_2km"][0] == 1 # Park
|
||||||
|
assert result["public_transport_2km"][0] == 1 # Station
|
||||||
|
|
||||||
|
# Second row is the same postcode, so same counts
|
||||||
|
assert result["restaurants_2km"][1] == result["restaurants_2km"][0]
|
||||||
|
|
||||||
|
# Third row (ZZ99 9ZZ) is far from all POIs → zero counts
|
||||||
|
for group in POI_GROUPS:
|
||||||
|
assert result[f"{group}_2km"][2] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_pois_returns_zeros(properties):
|
||||||
|
empty_pois = pl.DataFrame(
|
||||||
|
{
|
||||||
|
"lat": pl.Series([], dtype=pl.Float64),
|
||||||
|
"lng": pl.Series([], dtype=pl.Float64),
|
||||||
|
"category": pl.Series([], dtype=pl.String),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
result = count_pois_within_radius(properties, empty_pois, radius_km=2.0)
|
||||||
|
|
||||||
|
for group in POI_GROUPS:
|
||||||
|
col = f"{group}_2km"
|
||||||
|
assert col in result
|
||||||
|
assert result[col].to_list() == [0, 0, 0]
|
||||||
|
|
||||||
|
|
||||||
|
def test_custom_radius(pois):
|
||||||
|
"""A tiny radius should exclude POIs that are even slightly away."""
|
||||||
|
properties = pl.DataFrame(
|
||||||
|
{
|
||||||
|
"postcode": ["EC1A 1BB"],
|
||||||
|
"lat": [51.5074],
|
||||||
|
"lon": [-0.1278],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 0.01 km = 10m — only the POI at the exact same location should match
|
||||||
|
result = count_pois_within_radius(properties, pois, radius_km=0.01)
|
||||||
|
# The Restaurant at (51.5074, -0.1278) is at distance 0
|
||||||
|
assert result["restaurants_0km"][0] >= 1
|
||||||
|
# POIs >100m away should not be counted
|
||||||
|
total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS)
|
||||||
|
assert total <= 2 # at most the co-located POIs
|
||||||
|
|
@ -6,11 +6,9 @@ readme = "README.md"
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"attrs>=22.2.0",
|
"attrs>=22.2.0",
|
||||||
"httpx>=0.28.1",
|
"httpx[socks]>=0.28.1",
|
||||||
"ipywidgets>=8.0.0",
|
"ipywidgets>=8.0.0",
|
||||||
"journey-client",
|
|
||||||
"jupyter>=1.0.0",
|
"jupyter>=1.0.0",
|
||||||
"nest-asyncio>=1.6.0",
|
|
||||||
"numpy>=1.26.0",
|
"numpy>=1.26.0",
|
||||||
"pandas>=2.0.0",
|
"pandas>=2.0.0",
|
||||||
"plotly>=6.5.2",
|
"plotly>=6.5.2",
|
||||||
|
|
@ -18,17 +16,31 @@ dependencies = [
|
||||||
"pyarrow>=15.0.0",
|
"pyarrow>=15.0.0",
|
||||||
"python-dateutil>=2.8.0",
|
"python-dateutil>=2.8.0",
|
||||||
"tqdm>=4.67.1",
|
"tqdm>=4.67.1",
|
||||||
"fastapi[standard]>=0.115.0",
|
|
||||||
"uvicorn>=0.34.0",
|
|
||||||
"h3>=3.7.0",
|
|
||||||
"overturemaps>=0.18.0",
|
|
||||||
"fastexcel>=0.19.0",
|
"fastexcel>=0.19.0",
|
||||||
"scipy>=1.17.0",
|
"scipy>=1.17.0",
|
||||||
"matplotlib>=3.10.8",
|
"matplotlib>=3.10.8",
|
||||||
|
"osmium>=4.0.0",
|
||||||
|
"matplotlib>=3.10.8",
|
||||||
|
"thefuzz>=0.22.1",
|
||||||
|
"scipy>=1.17.0",
|
||||||
|
"shapely>=2.0.0",
|
||||||
|
"rasterio>=1.5.0",
|
||||||
|
"pyproj>=3.7.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[tool.uv]
|
||||||
dev = ["ruff>=0.8.0"]
|
environments = ["sys_platform == 'linux' and python_version < '3.14'"]
|
||||||
|
|
||||||
[tool.uv.sources]
|
[dependency-groups]
|
||||||
journey-client = { path = "./tfl_journey_client" }
|
dev = [
|
||||||
|
"deptry>=0.22.0",
|
||||||
|
"pytest>=9.0.2",
|
||||||
|
"ruff>=0.8.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.deptry.per_rule_ignores]
|
||||||
|
# pyarrow/fastexcel: runtime backends for polars parquet/Excel I/O
|
||||||
|
# jupyter/ipywidgets/pandas: needed to run analysis notebooks
|
||||||
|
DEP002 = ["pyarrow", "fastexcel", "jupyter", "ipywidgets", "pandas"]
|
||||||
|
# pytest is a dev dependency, not a missing one
|
||||||
|
DEP004 = ["pytest"]
|
||||||
|
|
|
||||||
2962
server-rs/Cargo.lock
generated
Normal file
2962
server-rs/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
33
server-rs/Cargo.toml
Normal file
33
server-rs/Cargo.toml
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
[package]
|
||||||
|
name = "property-map-server"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1"
|
||||||
|
clap = { version = "4", features = ["derive"] }
|
||||||
|
axum = "0.8"
|
||||||
|
tower-http = { version = "0.6", features = ["cors", "fs", "compression-gzip", "compression-zstd", "trace"] }
|
||||||
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
polars = { version = "0.46", features = ["parquet", "lazy", "dtype-struct", "dtype-u8", "dtype-u16", "dtype-i8", "dtype-i16"] }
|
||||||
|
h3o = "0.7"
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
serde_json = "1"
|
||||||
|
rayon = "1"
|
||||||
|
rustc-hash = "2"
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
|
|
||||||
|
[lints.clippy]
|
||||||
|
min_ident_chars = "warn"
|
||||||
|
|
||||||
|
[profile.dev]
|
||||||
|
opt-level = 1
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
opt-level = 3
|
||||||
|
lto = "thin"
|
||||||
|
|
||||||
|
[profile.production]
|
||||||
|
inherits = "release"
|
||||||
|
lto = true
|
||||||
1
server-rs/clippy.toml
Normal file
1
server-rs/clippy.toml
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
allowed-idents-below-min-chars = ["i", "j", "k", "_"]
|
||||||
8
server-rs/rust-toolchain.toml
Normal file
8
server-rs/rust-toolchain.toml
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
[toolchain]
|
||||||
|
channel = "stable"
|
||||||
|
targets = [
|
||||||
|
"x86_64-unknown-linux-gnu",
|
||||||
|
"x86_64-unknown-linux-musl",
|
||||||
|
"aarch64-unknown-linux-gnu",
|
||||||
|
]
|
||||||
|
profile = "default"
|
||||||
28
server-rs/src/consts.rs
Normal file
28
server-rs/src/consts.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
pub const HISTOGRAM_BINS: usize = 100;
|
||||||
|
|
||||||
|
pub const H3_PRECOMPUTE_MIN: u8 = 4;
|
||||||
|
pub const H3_PRECOMPUTE_MAX: u8 = 12;
|
||||||
|
|
||||||
|
pub const SERVER_ADDRESS: &str = "0.0.0.0:8001";
|
||||||
|
|
||||||
|
pub const BOUNDS_QUANTIZATION: f64 = 0.01;
|
||||||
|
pub const BOUNDS_BUFFER_PERCENT: f64 = 0.1;
|
||||||
|
pub const POSTCODE_MIN_RESOLUTION: u8 = 11;
|
||||||
|
pub const MAX_POIS_PER_REQUEST: usize = 2500;
|
||||||
|
pub const DEFAULT_PROPERTIES_LIMIT: usize = 100;
|
||||||
|
pub const MAX_PROPERTIES_LIMIT: usize = 500;
|
||||||
|
pub const ENUM_NULL: u8 = 255;
|
||||||
|
|
||||||
|
/// Canonical display order for POI category groups.
|
||||||
|
/// The server will panic at startup if the data contains groups not in this list or vice versa.
|
||||||
|
pub const POI_GROUP_ORDER: &[&str] = &[
|
||||||
|
"Public Transport",
|
||||||
|
"Amenity",
|
||||||
|
"Building",
|
||||||
|
"Craft",
|
||||||
|
"Healthcare",
|
||||||
|
"Leisure",
|
||||||
|
"Office",
|
||||||
|
"Shop",
|
||||||
|
"Tourism",
|
||||||
|
];
|
||||||
676
server-rs/src/features.rs
Normal file
676
server-rs/src/features.rs
Normal file
|
|
@ -0,0 +1,676 @@
|
||||||
|
//! Static feature configuration. Every numeric and enum column in wide.parquet
|
||||||
|
//! must be declared here. Unknown columns cause a startup panic.
|
||||||
|
|
||||||
|
pub enum Bounds {
|
||||||
|
/// Fixed min/max values for the slider
|
||||||
|
Fixed { min: f64, max: f64 },
|
||||||
|
/// Compute percentile from data at startup
|
||||||
|
Percentile { low: f64, high: f64 },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FeatureConfig {
|
||||||
|
/// Must match parquet column name exactly (also used as display label)
|
||||||
|
pub name: &'static str,
|
||||||
|
pub bounds: Bounds,
|
||||||
|
/// Slider step size. Controls the granularity of the range slider in the UI.
|
||||||
|
pub step: f64,
|
||||||
|
/// Short one-line description shown in the filter sidebar
|
||||||
|
pub description: &'static str,
|
||||||
|
/// Longer description explaining methodology, data source, and caveats
|
||||||
|
pub detail: &'static str,
|
||||||
|
/// Data source slug for linking to /data-sources#<slug>
|
||||||
|
pub source: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FeatureGroup {
|
||||||
|
pub name: &'static str,
|
||||||
|
pub features: &'static [FeatureConfig],
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct EnumFeatureConfig {
|
||||||
|
pub name: &'static str,
|
||||||
|
/// If set, values are presented in this order instead of alphabetical.
|
||||||
|
/// Values not listed are appended alphabetically after the ordered ones.
|
||||||
|
pub order: Option<&'static [&'static str]>,
|
||||||
|
/// Short one-line description shown in the filter sidebar
|
||||||
|
pub description: &'static str,
|
||||||
|
/// Longer description explaining methodology, data source, and caveats
|
||||||
|
pub detail: &'static str,
|
||||||
|
/// Data source slug for linking to /data-sources#<slug>
|
||||||
|
pub source: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct EnumFeatureGroup {
|
||||||
|
pub name: &'static str,
|
||||||
|
pub features: &'static [EnumFeatureConfig],
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Columns in parquet that are neither numeric features nor enum features.
|
||||||
|
/// These are silently skipped during schema validation.
|
||||||
|
pub const IGNORED_COLUMNS: &[&str] = &[
|
||||||
|
"lat",
|
||||||
|
"lon",
|
||||||
|
"Address per Property Register",
|
||||||
|
"Address per EPC",
|
||||||
|
"Postcode",
|
||||||
|
"historical_prices",
|
||||||
|
"Is construction date approximate",
|
||||||
|
];
|
||||||
|
|
||||||
|
pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Property",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Last known price",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 2_000_000.0,
|
||||||
|
},
|
||||||
|
step: 10000.0,
|
||||||
|
description: "Most recent sale price from the Land Registry",
|
||||||
|
detail: "The last recorded sale price for this property from HM Land Registry Price Paid data. Covers residential sales in England and Wales. May be years old if the property hasn't sold recently.",
|
||||||
|
source: "price-paid",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Price per sqm",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 0.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 100.0,
|
||||||
|
description: "Sale price divided by total floor area",
|
||||||
|
detail: "Calculated by dividing the last known sale price by the total floor area from the EPC certificate. Useful for comparing value across different-sized properties. Only available where both price and floor area data exist.",
|
||||||
|
source: "price-paid",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Total floor area (sqm)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 0.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Internal floor area from the EPC survey",
|
||||||
|
detail: "Total useful floor area in square metres as measured during the Energy Performance Certificate assessment. Includes all habitable rooms but excludes garages, outbuildings, and external areas.",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Number of bedrooms & living rooms",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 1.0,
|
||||||
|
max: 10.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Count of habitable rooms from the EPC survey",
|
||||||
|
detail: "Total number of habitable rooms (bedrooms plus living rooms) as recorded in the Energy Performance Certificate. Kitchens and bathrooms are typically excluded unless they are large enough to count as habitable rooms.",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Approximate construction age",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 2026.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Estimated year of construction from the EPC",
|
||||||
|
detail: "The approximate year of construction as recorded in the Energy Performance Certificate. Derived from the construction age band (e.g. '1930-1949') by taking the midpoint. May be approximate, especially for older buildings.",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Transport",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "public_transport_easy_minutes",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 180.0,
|
||||||
|
},
|
||||||
|
step: 2.0,
|
||||||
|
description: "Quickest public transport journey to central London (easy route)",
|
||||||
|
detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'easy' route minimises changes and walking. Calculated for weekday morning commute times.",
|
||||||
|
source: "tfl-journey-times",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "public_transport_quick_minutes",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 180.0,
|
||||||
|
},
|
||||||
|
step: 2.0,
|
||||||
|
description: "Fastest public transport journey to central London",
|
||||||
|
detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'quick' route optimises for shortest total time regardless of changes. Calculated for weekday morning commute times.",
|
||||||
|
source: "tfl-journey-times",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "cycling_minutes",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 180.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Cycling time to central London via TfL routing",
|
||||||
|
detail: "Cycling journey time in minutes to central London destinations, as calculated by the TfL Journey Planner API. Uses TfL's default cycling speed and route preferences.",
|
||||||
|
source: "tfl-journey-times",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Public transport within 2km",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 5.0,
|
||||||
|
high: 95.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Number of public transport stops within 2km",
|
||||||
|
detail: "Count of bus stops, rail stations, tube stations, tram stops, and other public transport access points within a 2km radius of the property's postcode. Derived from the NaPTAN (National Public Transport Access Nodes) dataset.",
|
||||||
|
source: "naptan",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Education",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Education, Skills and Training Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "IoD education deprivation score for the local area",
|
||||||
|
detail: "From the English Indices of Deprivation. Measures deprivation in education, skills and training in the local area (LSOA). Higher scores indicate greater deprivation. Combines children/young people sub-domain (school attainment, entry to higher education) and adult skills sub-domain (adult qualifications, English language proficiency).",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Good+ primary schools within 5km",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 30.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Primary schools rated Good or Outstanding by Ofsted nearby",
|
||||||
|
detail: "Number of state-funded primary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
|
||||||
|
source: "ofsted",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Good+ secondary schools within 5km",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 15.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Secondary schools rated Good or Outstanding by Ofsted nearby",
|
||||||
|
detail: "Number of state-funded secondary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
|
||||||
|
source: "ofsted",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Deprivation",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Index of Multiple Deprivation (IMD) Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Overall deprivation score combining all domains",
|
||||||
|
detail: "The Index of Multiple Deprivation is the official measure of relative deprivation in England. It combines seven weighted domains: Income (22.5%), Employment (22.5%), Education (13.5%), Health (13.5%), Crime (9.3%), Barriers to Housing & Services (9.3%), and Living Environment (9.3%). Higher scores indicate greater deprivation. Measured at LSOA level (~1,500 people).",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Income Score (rate)",
|
||||||
|
bounds: Bounds::Fixed { min: 0.0, max: 0.6 },
|
||||||
|
step: 0.01,
|
||||||
|
description: "Proportion of the population experiencing income deprivation",
|
||||||
|
detail: "From the English Indices of Deprivation. The proportion of the local population experiencing deprivation relating to low income. Includes people on Income Support, income-based Jobseeker's Allowance, income-based Employment and Support Allowance, Pension Credit, Working Tax Credit and Child Tax Credit, Universal Credit, and asylum seekers.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Employment Score (rate)",
|
||||||
|
bounds: Bounds::Fixed { min: 0.0, max: 0.4 },
|
||||||
|
step: 0.01,
|
||||||
|
description: "Proportion of the working-age population involuntarily excluded from work",
|
||||||
|
detail: "From the English Indices of Deprivation. The proportion of the working-age population involuntarily excluded from the labour market. Includes claimants of Jobseeker's Allowance, Employment and Support Allowance, Incapacity Benefit, Severe Disablement Allowance, Carer's Allowance, and relevant Universal Credit claimants.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Health Deprivation and Disability Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Risk of premature death and quality of life impairment",
|
||||||
|
detail: "From the English Indices of Deprivation. Measures the risk of premature death and impairment of quality of life through poor physical or mental health. Derived from years of potential life lost, comparative illness and disability ratio, acute morbidity, and mood and anxiety disorders.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Crime Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "IoD crime deprivation score measuring personal risk",
|
||||||
|
detail: "From the English Indices of Deprivation. Measures the risk of personal and material victimisation at local level. Derived from recorded rates of violence, burglary, theft, and criminal damage. Higher scores indicate higher crime-related deprivation.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Living Environment Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Quality of the local indoor and outdoor environment",
|
||||||
|
detail: "From the English Indices of Deprivation. Measures deprivation in the quality of the local environment. Combines the Indoors sub-domain (housing quality, central heating, housing conditions) and Outdoors sub-domain (air quality, road traffic accidents). Higher scores indicate poorer living environments.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Indoors Sub-domain Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Housing quality and conditions in the local area",
|
||||||
|
detail: "From the English Indices of Deprivation, Living Environment domain. Measures the quality of housing stock: houses without central heating, housing in poor condition, and houses failing Decent Homes standards. Higher scores indicate worse housing conditions.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Outdoors Sub-domain Score",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Air quality and road safety in the local area",
|
||||||
|
detail: "From the English Indices of Deprivation, Living Environment domain. Measures the outdoor living environment quality through air quality indicators and road traffic accident casualties involving pedestrians and cyclists. Higher scores indicate poorer outdoor environments.",
|
||||||
|
source: "iod",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Crime",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Anti-social behaviour (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly anti-social behaviour incidents in the area",
|
||||||
|
detail: "Average number of anti-social behaviour incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes nuisance, environmental, and personal anti-social behaviour.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Violence and sexual offences (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly violent and sexual offences in the area",
|
||||||
|
detail: "Average number of violence and sexual offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes assault, harassment, and sexual offences.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Criminal damage and arson (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly criminal damage and arson in the area",
|
||||||
|
detail: "Average number of criminal damage and arson incidents per year in the LSOA, from police.uk street-level crime data (2023-2025).",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Burglary (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly burglary offences in the area",
|
||||||
|
detail: "Average number of burglary offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes residential and commercial burglary.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Vehicle crime (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly vehicle crime in the area",
|
||||||
|
detail: "Average number of vehicle crime incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft of and from vehicles.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Robbery (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly robbery offences in the area",
|
||||||
|
detail: "Average number of robbery offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Robbery involves theft with force or threat of force.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Other theft (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly other theft offences in the area",
|
||||||
|
detail: "Average number of 'other theft' offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft not classified under burglary, vehicle crime, shoplifting, or bicycle theft.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Shoplifting (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly shoplifting offences in the area",
|
||||||
|
detail: "Average number of shoplifting offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Drugs (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly drug offences in the area",
|
||||||
|
detail: "Average number of drug offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes possession and trafficking offences.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Possession of weapons (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly weapons possession offences in the area",
|
||||||
|
detail: "Average number of possession of weapons offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Public order (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly public order offences in the area",
|
||||||
|
detail: "Average number of public order offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes causing fear, alarm, or distress.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Bicycle theft (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly bicycle theft in the area",
|
||||||
|
detail: "Average number of bicycle theft offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Theft from the person (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly theft from the person in the area",
|
||||||
|
detail: "Average number of theft from the person offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes pickpocketing and bag snatching without force.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Other crime (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Average yearly other crime in the area",
|
||||||
|
detail: "Average number of other crime offences per year in the LSOA, from police.uk street-level crime data (2023-2025). A catch-all category for offences not classified elsewhere.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Serious crime (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Aggregate of serious crime categories per year",
|
||||||
|
detail: "Sum of violence, robbery, burglary, and weapons possession per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single serious crime metric.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Minor crime (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Aggregate of minor crime categories per year",
|
||||||
|
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single minor crime metric.",
|
||||||
|
source: "crime",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Demographics",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "% White",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 100.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Percentage of population identifying as White",
|
||||||
|
detail: "From the 2021 Census. Percentage of the local authority population identifying as White (English, Welsh, Scottish, Northern Irish, British, Irish, Gypsy or Irish Traveller, Roma, or any other White background).",
|
||||||
|
source: "ethnicity",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "% Asian",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 100.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Percentage of population identifying as Asian",
|
||||||
|
detail: "From the 2021 Census. Percentage of the local authority population identifying as Asian or Asian British (Indian, Pakistani, Bangladeshi, Chinese, or any other Asian background).",
|
||||||
|
source: "ethnicity",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "% Black",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 100.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Percentage of population identifying as Black",
|
||||||
|
detail: "From the 2021 Census. Percentage of the local authority population identifying as Black, Black British, Caribbean, or African.",
|
||||||
|
source: "ethnicity",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "% Mixed",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 100.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Percentage of population identifying as Mixed or Multiple ethnic groups",
|
||||||
|
detail: "From the 2021 Census. Percentage of the local authority population identifying as Mixed or Multiple ethnic groups (White and Black Caribbean, White and Black African, White and Asian, or any other Mixed or Multiple background).",
|
||||||
|
source: "ethnicity",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "% Other",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 0.0,
|
||||||
|
max: 100.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Percentage of population identifying as Other ethnic group",
|
||||||
|
detail: "From the 2021 Census. Percentage of the local authority population identifying as Other ethnic group (Arab or any other ethnic group not covered by the main categories).",
|
||||||
|
source: "ethnicity",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Amenities",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Restaurants within 2km",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 5.0,
|
||||||
|
high: 95.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Number of restaurants and cafes within 2km",
|
||||||
|
detail: "Count of restaurants, cafes, and food establishments within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data using haversine distance calculation with a 0.05° spatial grid for candidate reduction.",
|
||||||
|
source: "osm-pois",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Groceries within 2km",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 5.0,
|
||||||
|
high: 95.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Number of grocery shops and supermarkets within 2km",
|
||||||
|
detail: "Count of supermarkets, convenience stores, and other grocery shops within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
|
||||||
|
source: "osm-pois",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Parks within 2km",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 5.0,
|
||||||
|
high: 95.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Number of parks and green spaces within 2km",
|
||||||
|
detail: "Count of parks, gardens, nature reserves, and other green spaces within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
|
||||||
|
source: "osm-pois",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
FeatureGroup {
|
||||||
|
name: "Environment",
|
||||||
|
features: &[
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Noise (dB)",
|
||||||
|
bounds: Bounds::Fixed {
|
||||||
|
min: 50.0,
|
||||||
|
max: 80.0,
|
||||||
|
},
|
||||||
|
step: 1.0,
|
||||||
|
description: "Road noise level at the postcode in decibels (Lden)",
|
||||||
|
detail: "Road noise level in decibels (Lden — day-evening-night 24-hour weighted average) from Defra's Strategic Noise Mapping Round 4 (2022). Modelled at 4m above ground on a 10m grid. Sampled at postcode centroids via WCS GeoTIFF tiles. Values above ~55 dB are generally considered noticeable; above ~70 dB can affect health.",
|
||||||
|
source: "noise",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Max available download speed (Mbps)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 5.0,
|
||||||
|
high: 95.0,
|
||||||
|
},
|
||||||
|
step: 10.0,
|
||||||
|
description: "Maximum broadband download speed available at the postcode",
|
||||||
|
detail: "Maximum available fixed broadband download speed in Megabits per second, from Ofcom's Connected Nations 2025 report. Measured at Output Area level and represents the maximum speed available from any provider, not actual achieved speeds.",
|
||||||
|
source: "broadband",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[EnumFeatureGroup {
|
||||||
|
name: "Property",
|
||||||
|
features: &[
|
||||||
|
EnumFeatureConfig {
|
||||||
|
name: "Leashold/Freehold",
|
||||||
|
order: Some(&["Freehold", "Leasehold"]),
|
||||||
|
description: "Whether the property is leasehold or freehold",
|
||||||
|
detail: "From HM Land Registry Price Paid data. Freehold means you own the building and the land it stands on. Leasehold means you own the building but not the land — you have a lease from the freeholder for a set number of years.",
|
||||||
|
source: "price-paid",
|
||||||
|
},
|
||||||
|
EnumFeatureConfig {
|
||||||
|
name: "Current energy rating",
|
||||||
|
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||||
|
description: "Current EPC energy efficiency rating (A-G)",
|
||||||
|
detail: "The current energy efficiency rating from the Energy Performance Certificate, graded A (most efficient) to G (least efficient). Based on the energy costs per square metre of floor area for heating, hot water, lighting, and ventilation.",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
EnumFeatureConfig {
|
||||||
|
name: "Potential energy rating",
|
||||||
|
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||||
|
description: "Achievable EPC rating after recommended improvements",
|
||||||
|
detail: "The potential energy efficiency rating that could be achieved if all cost-effective improvements recommended in the EPC were carried out. Graded A (most efficient) to G (least efficient).",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
EnumFeatureConfig {
|
||||||
|
name: "Property type",
|
||||||
|
order: Some(&["Detached", "Semi-Detached", "Terraced", "Flat"]),
|
||||||
|
description: "Type of property: detached, semi-detached, terraced, or flat",
|
||||||
|
detail: "From HM Land Registry Price Paid data. The broad property type classification: Detached, Semi-Detached, Terraced, or Flat/Maisonette.",
|
||||||
|
source: "price-paid",
|
||||||
|
},
|
||||||
|
EnumFeatureConfig {
|
||||||
|
name: "Property type/built form",
|
||||||
|
order: None,
|
||||||
|
description: "Detailed property type and built form from the EPC",
|
||||||
|
detail: "A more detailed classification from the Energy Performance Certificate combining property type and built form. Examples include 'Semi-Detached House', 'Mid-Terrace House', 'Ground-Floor Flat', 'Detached Bungalow', etc.",
|
||||||
|
source: "epc",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}];
|
||||||
|
|
||||||
|
/// Flat ordered list of all numeric feature names (follows group order).
|
||||||
|
pub fn all_numeric_feature_names() -> Vec<&'static str> {
|
||||||
|
FEATURE_GROUPS
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| group.features.iter().map(|feature| feature.name))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flat ordered list of all enum feature names (follows group order).
|
||||||
|
pub fn all_enum_feature_names() -> Vec<&'static str> {
|
||||||
|
ENUM_FEATURE_GROUPS
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| group.features.iter().map(|feature| feature.name))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up the configured value order for an enum feature by name.
|
||||||
|
pub fn order_for(name: &str) -> Option<&'static [&'static str]> {
|
||||||
|
ENUM_FEATURE_GROUPS
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| group.features.iter())
|
||||||
|
.find(|feature| feature.name == name)
|
||||||
|
.and_then(|feature| feature.order)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Look up the Bounds config for a numeric feature by name.
|
||||||
|
pub fn bounds_for(name: &str) -> Option<&'static Bounds> {
|
||||||
|
FEATURE_GROUPS
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| group.features.iter())
|
||||||
|
.find(|feature| feature.name == name)
|
||||||
|
.map(|feature| &feature.bounds)
|
||||||
|
}
|
||||||
86
server-rs/src/filter.rs
Normal file
86
server-rs/src/filter.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
use crate::consts::ENUM_NULL;
|
||||||
|
use crate::data::EnumFeatureData;
|
||||||
|
|
||||||
|
pub struct ParsedFilter {
|
||||||
|
pub feat_idx: usize,
|
||||||
|
pub min: f64,
|
||||||
|
pub max: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ParsedEnumFilter {
|
||||||
|
pub enum_idx: usize,
|
||||||
|
pub allowed: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse comma-separated filter string into numeric and enum filters.
|
||||||
|
/// Numeric format: `name:min:max`
|
||||||
|
/// Enum format: `name:val1|val2|val3` (pipe-separated values)
|
||||||
|
pub fn parse_filters(
|
||||||
|
filter_str: Option<&str>,
|
||||||
|
feature_names: &[String],
|
||||||
|
enum_features: &[EnumFeatureData],
|
||||||
|
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
|
||||||
|
let mut numeric = Vec::new();
|
||||||
|
let mut enums = Vec::new();
|
||||||
|
|
||||||
|
let input = match filter_str.filter(|text| !text.is_empty()) {
|
||||||
|
Some(text) => text,
|
||||||
|
None => return (numeric, enums),
|
||||||
|
};
|
||||||
|
|
||||||
|
for entry in input.split(',') {
|
||||||
|
let parts: Vec<&str> = entry.splitn(2, ':').collect();
|
||||||
|
if parts.len() != 2 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let name = parts[0].trim();
|
||||||
|
let rest = parts[1].trim();
|
||||||
|
|
||||||
|
if let Some(enum_idx) = enum_features.iter().position(|enum_feat| enum_feat.name == name) {
|
||||||
|
let enum_feat = &enum_features[enum_idx];
|
||||||
|
let allowed: Vec<u8> = rest
|
||||||
|
.split('|')
|
||||||
|
.filter_map(|value| {
|
||||||
|
let value = value.trim();
|
||||||
|
enum_feat.values.iter().position(|existing| existing == value).map(|position| position as u8)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
enums.push(ParsedEnumFilter { enum_idx, allowed });
|
||||||
|
} else {
|
||||||
|
let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
|
||||||
|
if num_parts.len() != 2 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let min = match num_parts[0].trim().parse::<f64>() {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
let max = match num_parts[1].trim().parse::<f64>() {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) {
|
||||||
|
numeric.push(ParsedFilter { feat_idx, min, max });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(numeric, enums)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn row_passes_filters(
|
||||||
|
row: usize,
|
||||||
|
filters: &[ParsedFilter],
|
||||||
|
enum_filters: &[ParsedEnumFilter],
|
||||||
|
feature_data: &[f64],
|
||||||
|
num_features: usize,
|
||||||
|
enum_features: &[EnumFeatureData],
|
||||||
|
) -> bool {
|
||||||
|
filters.iter().all(|filter| {
|
||||||
|
let value = feature_data[row * num_features + filter.feat_idx];
|
||||||
|
value.is_finite() && value >= filter.min && value <= filter.max
|
||||||
|
}) && enum_filters.iter().all(|enum_filter| {
|
||||||
|
let value = enum_features[enum_filter.enum_idx].data[row];
|
||||||
|
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||||
|
})
|
||||||
|
}
|
||||||
147
server-rs/src/grid_index.rs
Normal file
147
server-rs/src/grid_index.rs
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
/// Grid-based spatial index for fast rectangle queries over property rows.
|
||||||
|
///
|
||||||
|
/// Divides the UK bounding box into cells of ~0.01 degrees (~1km),
|
||||||
|
/// each storing indices of rows whose lat/lon falls within that cell.
|
||||||
|
pub struct GridIndex {
|
||||||
|
min_lat: f64,
|
||||||
|
min_lon: f64,
|
||||||
|
cell_size: f64,
|
||||||
|
cols: usize,
|
||||||
|
rows: usize,
|
||||||
|
/// cells[row * cols + col] = vec of row indices
|
||||||
|
cells: Vec<Vec<u32>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GridIndex {
|
||||||
|
pub fn build(lat: &[f64], lon: &[f64], cell_size: f64) -> Self {
|
||||||
|
let mut min_lat = f64::INFINITY;
|
||||||
|
let mut max_lat = f64::NEG_INFINITY;
|
||||||
|
let mut min_lon = f64::INFINITY;
|
||||||
|
let mut max_lon = f64::NEG_INFINITY;
|
||||||
|
|
||||||
|
for index in 0..lat.len() {
|
||||||
|
if lat[index] < min_lat {
|
||||||
|
min_lat = lat[index];
|
||||||
|
}
|
||||||
|
if lat[index] > max_lat {
|
||||||
|
max_lat = lat[index];
|
||||||
|
}
|
||||||
|
if lon[index] < min_lon {
|
||||||
|
min_lon = lon[index];
|
||||||
|
}
|
||||||
|
if lon[index] > max_lon {
|
||||||
|
max_lon = lon[index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
min_lat -= cell_size;
|
||||||
|
min_lon -= cell_size;
|
||||||
|
max_lat += cell_size;
|
||||||
|
max_lon += cell_size;
|
||||||
|
|
||||||
|
let rows = ((max_lat - min_lat) / cell_size).ceil() as usize + 1;
|
||||||
|
let cols = ((max_lon - min_lon) / cell_size).ceil() as usize + 1;
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
rows_grid = rows,
|
||||||
|
cols_grid = cols,
|
||||||
|
total_cells = rows * cols,
|
||||||
|
cell_size,
|
||||||
|
"Building grid index"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut cells: Vec<Vec<u32>> = vec![Vec::new(); rows * cols];
|
||||||
|
|
||||||
|
for index in 0..lat.len() {
|
||||||
|
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||||
|
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
|
||||||
|
let cell_index = grid_row * cols + grid_col;
|
||||||
|
cells[cell_index].push(index as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::debug!("Grid index built");
|
||||||
|
|
||||||
|
GridIndex {
|
||||||
|
min_lat,
|
||||||
|
min_lon,
|
||||||
|
cell_size,
|
||||||
|
cols,
|
||||||
|
rows,
|
||||||
|
cells,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query(&self, south: f64, west: f64, north: f64, east: f64) -> Vec<u32> {
|
||||||
|
let Some((row_min, row_max, col_min, col_max)) =
|
||||||
|
self.clamp_bounds(south, west, north, east)
|
||||||
|
else {
|
||||||
|
return Vec::new();
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
for row in row_min..=row_max {
|
||||||
|
let row_start = row * self.cols;
|
||||||
|
for col in col_min..=col_max {
|
||||||
|
result.extend_from_slice(&self.cells[row_start + col]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn for_each_in_bounds(
|
||||||
|
&self,
|
||||||
|
south: f64,
|
||||||
|
west: f64,
|
||||||
|
north: f64,
|
||||||
|
east: f64,
|
||||||
|
mut callback: impl FnMut(u32),
|
||||||
|
) {
|
||||||
|
let Some((row_min, row_max, col_min, col_max)) =
|
||||||
|
self.clamp_bounds(south, west, north, east)
|
||||||
|
else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
for row in row_min..=row_max {
|
||||||
|
let row_start = row * self.cols;
|
||||||
|
for col in col_min..=col_max {
|
||||||
|
for &row_idx in &self.cells[row_start + col] {
|
||||||
|
callback(row_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clamp_bounds(
|
||||||
|
&self,
|
||||||
|
south: f64,
|
||||||
|
west: f64,
|
||||||
|
north: f64,
|
||||||
|
east: f64,
|
||||||
|
) -> Option<(usize, usize, usize, usize)> {
|
||||||
|
let row_min_raw = ((south - self.min_lat) / self.cell_size) as isize;
|
||||||
|
let row_max_raw = ((north - self.min_lat) / self.cell_size) as isize;
|
||||||
|
let col_min_raw = ((west - self.min_lon) / self.cell_size) as isize;
|
||||||
|
let col_max_raw = ((east - self.min_lon) / self.cell_size) as isize;
|
||||||
|
|
||||||
|
let row_min = row_min_raw.max(0) as usize;
|
||||||
|
let row_max_clamped = row_max_raw.min(self.rows as isize - 1);
|
||||||
|
let col_min = col_min_raw.max(0) as usize;
|
||||||
|
let col_max_clamped = col_max_raw.min(self.cols as isize - 1);
|
||||||
|
|
||||||
|
if row_max_clamped < 0 || col_max_clamped < 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let row_max = row_max_clamped as usize;
|
||||||
|
let col_max = col_max_clamped as usize;
|
||||||
|
|
||||||
|
if row_min > row_max || col_min > col_max {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some((row_min, row_max, col_min, col_max))
|
||||||
|
}
|
||||||
|
}
|
||||||
242
server-rs/src/main.rs
Normal file
242
server-rs/src/main.rs
Normal file
|
|
@ -0,0 +1,242 @@
|
||||||
|
mod consts;
|
||||||
|
mod data;
|
||||||
|
mod features;
|
||||||
|
mod filter;
|
||||||
|
mod grid_index;
|
||||||
|
mod routes;
|
||||||
|
mod state;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use axum::routing::get;
|
||||||
|
use axum::Router;
|
||||||
|
use clap::Parser;
|
||||||
|
use tower_http::compression::CompressionLayer;
|
||||||
|
use tower_http::cors::{Any, CorsLayer};
|
||||||
|
use tower_http::services::ServeDir;
|
||||||
|
use tower_http::trace::TraceLayer;
|
||||||
|
use tracing::info;
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
use state::AppState;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(name = "narrowit", about = "Narrowit property map server")]
|
||||||
|
struct Cli {
|
||||||
|
/// Path to the wide property parquet file
|
||||||
|
#[arg(long)]
|
||||||
|
data: PathBuf,
|
||||||
|
|
||||||
|
/// Path to the POI parquet file
|
||||||
|
#[arg(long)]
|
||||||
|
pois: PathBuf,
|
||||||
|
|
||||||
|
/// Path to the frontend dist directory
|
||||||
|
#[arg(long)]
|
||||||
|
dist: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(
|
||||||
|
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||||
|
)
|
||||||
|
.with_ansi(true)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let cli = Cli::parse();
|
||||||
|
|
||||||
|
let parquet_path = &cli.data;
|
||||||
|
if !parquet_path.exists() {
|
||||||
|
bail!(
|
||||||
|
"Property parquet file not found: {}",
|
||||||
|
parquet_path.display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Loading property data from {}", parquet_path.display());
|
||||||
|
let property_data = data::PropertyData::load(parquet_path)?;
|
||||||
|
info!(
|
||||||
|
rows = property_data.lat.len(),
|
||||||
|
features = property_data.num_features,
|
||||||
|
enums = property_data.enum_features.len(),
|
||||||
|
"Property data loaded"
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Building spatial grid index (0.01° cells)");
|
||||||
|
let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Precomputing H3 cells for resolutions {}-{}",
|
||||||
|
consts::H3_PRECOMPUTE_MIN,
|
||||||
|
consts::H3_PRECOMPUTE_MAX
|
||||||
|
);
|
||||||
|
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
|
||||||
|
|
||||||
|
let poi_path = cli.pois;
|
||||||
|
|
||||||
|
if !poi_path.exists() {
|
||||||
|
bail!("POI parquet file not found: {}", poi_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("Loading POI data from {}", poi_path.display());
|
||||||
|
let poi_data = data::POIData::load(&poi_path)?;
|
||||||
|
info!(pois = poi_data.lat.len(), "POI data loaded");
|
||||||
|
|
||||||
|
info!("Building POI spatial grid index");
|
||||||
|
let poi_grid = grid_index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
|
||||||
|
|
||||||
|
let min_keys: Vec<String> = property_data
|
||||||
|
.feature_names
|
||||||
|
.iter()
|
||||||
|
.map(|name| format!("min_{}", name))
|
||||||
|
.collect();
|
||||||
|
let max_keys: Vec<String> = property_data
|
||||||
|
.feature_names
|
||||||
|
.iter()
|
||||||
|
.map(|name| format!("max_{}", name))
|
||||||
|
.collect();
|
||||||
|
let enum_min_keys: Vec<String> = property_data
|
||||||
|
.enum_features
|
||||||
|
.iter()
|
||||||
|
.map(|enum_feature| format!("min_{}", enum_feature.name))
|
||||||
|
.collect();
|
||||||
|
let enum_max_keys: Vec<String> = property_data
|
||||||
|
.enum_features
|
||||||
|
.iter()
|
||||||
|
.map(|enum_feature| format!("max_{}", enum_feature.name))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Precompute POI category groups
|
||||||
|
let poi_category_groups = {
|
||||||
|
let mut group_cats: std::collections::HashMap<String, std::collections::HashSet<String>> =
|
||||||
|
std::collections::HashMap::new();
|
||||||
|
for (category, group) in poi_data.category.iter().zip(poi_data.group.iter()) {
|
||||||
|
group_cats
|
||||||
|
.entry(group.clone())
|
||||||
|
.or_default()
|
||||||
|
.insert(category.clone());
|
||||||
|
}
|
||||||
|
// Validate that data groups match the hardcoded order exactly
|
||||||
|
let expected: std::collections::HashSet<&str> =
|
||||||
|
consts::POI_GROUP_ORDER.iter().copied().collect();
|
||||||
|
let actual: std::collections::HashSet<&str> =
|
||||||
|
group_cats.keys().map(|key| key.as_str()).collect();
|
||||||
|
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
|
||||||
|
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
|
||||||
|
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
|
||||||
|
bail!(
|
||||||
|
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
|
||||||
|
missing_from_data, missing_from_order
|
||||||
|
);
|
||||||
|
}
|
||||||
|
consts::POI_GROUP_ORDER.iter().map(|group_name| group_name.to_string()).collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.map(|name| {
|
||||||
|
let mut categories: Vec<String> =
|
||||||
|
group_cats.remove(&name).context("POI group validated but missing from map")?.into_iter().collect();
|
||||||
|
categories.sort();
|
||||||
|
Ok(state::POICategoryGroup { name, categories })
|
||||||
|
})
|
||||||
|
.collect::<anyhow::Result<Vec<_>>>()?
|
||||||
|
};
|
||||||
|
|
||||||
|
// Precompute enum name → index map
|
||||||
|
let enum_name_to_idx: rustc_hash::FxHashMap<String, usize> = property_data
|
||||||
|
.enum_features
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, enum_feature)| (enum_feature.name.clone(), index))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let state = Arc::new(AppState {
|
||||||
|
data: property_data,
|
||||||
|
grid,
|
||||||
|
h3_cells,
|
||||||
|
poi_data,
|
||||||
|
poi_grid,
|
||||||
|
min_keys,
|
||||||
|
max_keys,
|
||||||
|
enum_min_keys,
|
||||||
|
enum_max_keys,
|
||||||
|
poi_category_groups,
|
||||||
|
enum_name_to_idx,
|
||||||
|
});
|
||||||
|
|
||||||
|
let cors = CorsLayer::new()
|
||||||
|
.allow_origin(Any)
|
||||||
|
.allow_methods(Any)
|
||||||
|
.allow_headers(Any);
|
||||||
|
|
||||||
|
let state_features = state.clone();
|
||||||
|
let state_hexagons = state.clone();
|
||||||
|
let state_pois = state.clone();
|
||||||
|
let state_poi_categories = state.clone();
|
||||||
|
let state_hexagon_properties = state.clone();
|
||||||
|
let state_hexagon_stats = state.clone();
|
||||||
|
|
||||||
|
let api = Router::new()
|
||||||
|
.route(
|
||||||
|
"/api/features",
|
||||||
|
get(move || routes::get_features(state_features.clone())),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/hexagons",
|
||||||
|
get(move |query| routes::get_hexagons(state_hexagons.clone(), query)),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/pois",
|
||||||
|
get(move |query| routes::get_pois(state_pois.clone(), query)),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/poi-categories",
|
||||||
|
get(move || routes::get_poi_categories(state_poi_categories.clone())),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/hexagon-properties",
|
||||||
|
get(move |query| {
|
||||||
|
routes::get_hexagon_properties(state_hexagon_properties.clone(), query)
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.route(
|
||||||
|
"/api/hexagon-stats",
|
||||||
|
get(move |query| routes::get_hexagon_stats(state_hexagon_stats.clone(), query)),
|
||||||
|
);
|
||||||
|
|
||||||
|
let frontend_dist = cli.dist.unwrap_or_else(|| {
|
||||||
|
// Check next to the binary first, then fall back to working directory
|
||||||
|
if let Ok(executable) = std::env::current_exe() {
|
||||||
|
let executable_dir = executable.parent().unwrap_or_else(|| std::path::Path::new("."));
|
||||||
|
let dist_next_to_binary = executable_dir.join("dist");
|
||||||
|
if dist_next_to_binary.exists() {
|
||||||
|
return dist_next_to_binary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PathBuf::from("frontend/dist")
|
||||||
|
});
|
||||||
|
let app = if frontend_dist.exists() {
|
||||||
|
api.fallback_service(ServeDir::new(frontend_dist))
|
||||||
|
} else {
|
||||||
|
api
|
||||||
|
};
|
||||||
|
|
||||||
|
let app = app
|
||||||
|
.layer(cors)
|
||||||
|
.layer(CompressionLayer::new().zstd(true).gzip(true))
|
||||||
|
.layer(TraceLayer::new_for_http());
|
||||||
|
|
||||||
|
let addr = consts::SERVER_ADDRESS;
|
||||||
|
let listener = tokio::net::TcpListener::bind(addr)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("Failed to bind to {addr}"))?;
|
||||||
|
info!("Server listening on {}", addr);
|
||||||
|
axum::serve(listener, app)
|
||||||
|
.await
|
||||||
|
.context("Server error")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
136
server-rs/src/routes/features.rs
Normal file
136
server-rs/src/routes/features.rs
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::response::Json;
|
||||||
|
use serde::Serialize;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::data::Histogram;
|
||||||
|
use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
#[serde(tag = "type")]
|
||||||
|
pub enum FeatureInfo {
|
||||||
|
#[serde(rename = "numeric")]
|
||||||
|
Numeric {
|
||||||
|
name: String,
|
||||||
|
min: f64,
|
||||||
|
max: f64,
|
||||||
|
step: f64,
|
||||||
|
histogram: Histogram,
|
||||||
|
description: &'static str,
|
||||||
|
detail: &'static str,
|
||||||
|
source: &'static str,
|
||||||
|
},
|
||||||
|
#[serde(rename = "enum")]
|
||||||
|
Enum {
|
||||||
|
name: String,
|
||||||
|
values: Vec<String>,
|
||||||
|
description: &'static str,
|
||||||
|
detail: &'static str,
|
||||||
|
source: &'static str,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct FeatureGroupResponse {
|
||||||
|
name: String,
|
||||||
|
features: Vec<FeatureInfo>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct FeaturesResponse {
|
||||||
|
groups: Vec<FeatureGroupResponse>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
|
||||||
|
// Collect all group names in order, merging numeric and enum groups with the same name
|
||||||
|
let mut group_names: Vec<&str> = Vec::new();
|
||||||
|
for feature_group in FEATURE_GROUPS {
|
||||||
|
if !group_names.contains(&feature_group.name) {
|
||||||
|
group_names.push(feature_group.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for enum_group in ENUM_FEATURE_GROUPS {
|
||||||
|
if !group_names.contains(&enum_group.name) {
|
||||||
|
group_names.push(enum_group.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut groups: Vec<FeatureGroupResponse> = Vec::new();
|
||||||
|
|
||||||
|
for &group_name in &group_names {
|
||||||
|
let mut features: Vec<FeatureInfo> = Vec::new();
|
||||||
|
|
||||||
|
// Add numeric features for this group
|
||||||
|
for feature_group in FEATURE_GROUPS {
|
||||||
|
if feature_group.name == group_name {
|
||||||
|
for feature_config in feature_group.features {
|
||||||
|
if let Some(feat_idx) =
|
||||||
|
state.data.feature_names.iter().position(|feat_name| feat_name == feature_config.name)
|
||||||
|
{
|
||||||
|
let stats = &state.data.feature_stats[feat_idx];
|
||||||
|
features.push(FeatureInfo::Numeric {
|
||||||
|
name: feature_config.name.to_string(),
|
||||||
|
min: stats.slider_min,
|
||||||
|
max: stats.slider_max,
|
||||||
|
step: feature_config.step,
|
||||||
|
histogram: stats.histogram.clone(),
|
||||||
|
description: feature_config.description,
|
||||||
|
detail: feature_config.detail,
|
||||||
|
source: feature_config.source,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add enum features for this group
|
||||||
|
for enum_group in ENUM_FEATURE_GROUPS {
|
||||||
|
if enum_group.name == group_name {
|
||||||
|
for enum_config in enum_group.features {
|
||||||
|
if let Some(enum_feature) = state
|
||||||
|
.data
|
||||||
|
.enum_features
|
||||||
|
.iter()
|
||||||
|
.find(|enum_feat| enum_feat.name == enum_config.name)
|
||||||
|
{
|
||||||
|
features.push(FeatureInfo::Enum {
|
||||||
|
name: enum_config.name.to_string(),
|
||||||
|
values: enum_feature.values.clone(),
|
||||||
|
description: enum_config.description,
|
||||||
|
detail: enum_config.detail,
|
||||||
|
source: enum_config.source,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !features.is_empty() {
|
||||||
|
groups.push(FeatureGroupResponse {
|
||||||
|
name: group_name.to_string(),
|
||||||
|
features,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_numeric: usize = groups
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| &group.features)
|
||||||
|
.filter(|feature| matches!(feature, FeatureInfo::Numeric { .. }))
|
||||||
|
.count();
|
||||||
|
let num_enum: usize = groups
|
||||||
|
.iter()
|
||||||
|
.flat_map(|group| &group.features)
|
||||||
|
.filter(|feature| matches!(feature, FeatureInfo::Enum { .. }))
|
||||||
|
.count();
|
||||||
|
|
||||||
|
info!(
|
||||||
|
numeric = num_numeric,
|
||||||
|
enums = num_enum,
|
||||||
|
groups = groups.len(),
|
||||||
|
"GET /api/features"
|
||||||
|
);
|
||||||
|
Json(FeaturesResponse { groups })
|
||||||
|
}
|
||||||
251
server-rs/src/routes/hexagon_stats.rs
Normal file
251
server-rs/src/routes/hexagon_stats.rs
Normal file
|
|
@ -0,0 +1,251 @@
|
||||||
|
use std::fmt::Write;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::extract::Query;
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::IntoResponse;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::consts::{ENUM_NULL, HISTOGRAM_BINS};
|
||||||
|
use crate::filter::{parse_filters, row_passes_filters};
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
use super::parse::h3_cell_bounds;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct HexagonStatsParams {
|
||||||
|
pub h3: String,
|
||||||
|
pub resolution: u8,
|
||||||
|
pub filters: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_hexagon_stats(
|
||||||
|
state: Arc<AppState>,
|
||||||
|
Query(params): Query<HexagonStatsParams>,
|
||||||
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||||
|
let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| {
|
||||||
|
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
|
||||||
|
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
|
||||||
|
})?;
|
||||||
|
let cell_u64: u64 = cell.into();
|
||||||
|
|
||||||
|
let resolution = params.resolution as usize;
|
||||||
|
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
||||||
|
warn!(
|
||||||
|
resolution,
|
||||||
|
"Invalid or non-precomputed resolution for hexagon-stats"
|
||||||
|
);
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"Invalid or non-precomputed resolution".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let h3_str = params.h3.clone();
|
||||||
|
let filters_str = params.filters.clone();
|
||||||
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||||
|
params.filters.as_deref(),
|
||||||
|
&state.data.feature_names,
|
||||||
|
&state.data.enum_features,
|
||||||
|
);
|
||||||
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||||
|
|
||||||
|
let result = tokio::task::spawn_blocking(move || {
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
let h3_data = &state.h3_cells[resolution];
|
||||||
|
let num_features = state.data.num_features;
|
||||||
|
let feature_data = &state.data.feature_data;
|
||||||
|
let enum_features = &state.data.enum_features;
|
||||||
|
|
||||||
|
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||||
|
|
||||||
|
// Collect matching rows
|
||||||
|
let mut matching_rows: Vec<usize> = Vec::new();
|
||||||
|
state
|
||||||
|
.grid
|
||||||
|
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||||
|
let row = row_idx as usize;
|
||||||
|
if h3_data[row] == cell_u64
|
||||||
|
&& row_passes_filters(
|
||||||
|
row,
|
||||||
|
&parsed_filters,
|
||||||
|
&parsed_enum_filters,
|
||||||
|
feature_data,
|
||||||
|
num_features,
|
||||||
|
enum_features,
|
||||||
|
)
|
||||||
|
{
|
||||||
|
matching_rows.push(row);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let total_count = matching_rows.len();
|
||||||
|
|
||||||
|
// Build JSON directly via string buffer
|
||||||
|
let mut output = String::with_capacity(4096);
|
||||||
|
output.push_str("{\"count\":");
|
||||||
|
write!(output, "{}", total_count).unwrap();
|
||||||
|
|
||||||
|
// Numeric features: compute count, min, max, sum, histogram using global bin edges
|
||||||
|
output.push_str(",\"numeric_features\":[");
|
||||||
|
let mut first_numeric = true;
|
||||||
|
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
|
||||||
|
let global_stats = &state.data.feature_stats[feature_index];
|
||||||
|
let histogram_min = global_stats.histogram.min;
|
||||||
|
let histogram_max = global_stats.histogram.max;
|
||||||
|
let bin_width = global_stats.histogram.bin_width;
|
||||||
|
|
||||||
|
let mut count = 0usize;
|
||||||
|
let mut min_value = f64::INFINITY;
|
||||||
|
let mut max_value = f64::NEG_INFINITY;
|
||||||
|
let mut sum = 0.0f64;
|
||||||
|
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||||
|
|
||||||
|
for &row in &matching_rows {
|
||||||
|
let value = feature_data[row * num_features + feature_index];
|
||||||
|
if value.is_finite() {
|
||||||
|
count += 1;
|
||||||
|
if value < min_value {
|
||||||
|
min_value = value;
|
||||||
|
}
|
||||||
|
if value > max_value {
|
||||||
|
max_value = value;
|
||||||
|
}
|
||||||
|
sum += value;
|
||||||
|
|
||||||
|
// Bin into histogram using global edges
|
||||||
|
if bin_width > 0.0 {
|
||||||
|
let bin_index =
|
||||||
|
((value - histogram_min) / bin_width).floor() as isize;
|
||||||
|
let clamped_index = bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
|
||||||
|
bins[clamped_index] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !first_numeric {
|
||||||
|
output.push(',');
|
||||||
|
}
|
||||||
|
first_numeric = false;
|
||||||
|
|
||||||
|
let mean = sum / count as f64;
|
||||||
|
output.push_str("{\"name\":");
|
||||||
|
write_json_string(&mut output, feature_name);
|
||||||
|
write!(output, ",\"count\":{}", count).unwrap();
|
||||||
|
write!(output, ",\"min\":{}", format_f64(min_value)).unwrap();
|
||||||
|
write!(output, ",\"max\":{}", format_f64(max_value)).unwrap();
|
||||||
|
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
|
||||||
|
output.push_str(",\"histogram\":{\"min\":");
|
||||||
|
write!(output, "{}", format_f64(histogram_min)).unwrap();
|
||||||
|
output.push_str(",\"max\":");
|
||||||
|
write!(output, "{}", format_f64(histogram_max)).unwrap();
|
||||||
|
output.push_str(",\"bin_width\":");
|
||||||
|
write!(output, "{}", format_f64(bin_width)).unwrap();
|
||||||
|
output.push_str(",\"counts\":[");
|
||||||
|
for (bin_index, &bin_count) in bins.iter().enumerate() {
|
||||||
|
if bin_index > 0 {
|
||||||
|
output.push(',');
|
||||||
|
}
|
||||||
|
write!(output, "{}", bin_count).unwrap();
|
||||||
|
}
|
||||||
|
output.push_str("]}}")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enum features: count per value
|
||||||
|
output.push_str("],\"enum_features\":[");
|
||||||
|
let mut first_enum = true;
|
||||||
|
for enum_feature in enum_features {
|
||||||
|
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
|
||||||
|
Some(&index) => index,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
let enum_data = &state.data.enum_features[enum_index];
|
||||||
|
|
||||||
|
let mut value_counts = vec![0u64; enum_data.values.len()];
|
||||||
|
for &row in &matching_rows {
|
||||||
|
let value = enum_data.data[row];
|
||||||
|
if value != ENUM_NULL && (value as usize) < value_counts.len() {
|
||||||
|
value_counts[value as usize] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only include if there are any non-zero counts
|
||||||
|
let has_values = value_counts.iter().any(|&count| count > 0);
|
||||||
|
if !has_values {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !first_enum {
|
||||||
|
output.push(',');
|
||||||
|
}
|
||||||
|
first_enum = false;
|
||||||
|
|
||||||
|
output.push_str("{\"name\":");
|
||||||
|
write_json_string(&mut output, &enum_feature.name);
|
||||||
|
output.push_str(",\"counts\":{");
|
||||||
|
let mut first_value = true;
|
||||||
|
for (value_index, &count) in value_counts.iter().enumerate() {
|
||||||
|
if count == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !first_value {
|
||||||
|
output.push(',');
|
||||||
|
}
|
||||||
|
first_value = false;
|
||||||
|
write_json_string(&mut output, &enum_data.values[value_index]);
|
||||||
|
write!(output, ":{}", count).unwrap();
|
||||||
|
}
|
||||||
|
output.push_str("}}");
|
||||||
|
}
|
||||||
|
output.push_str("]}");
|
||||||
|
|
||||||
|
let elapsed = start_time.elapsed();
|
||||||
|
info!(
|
||||||
|
h3 = %h3_str,
|
||||||
|
resolution,
|
||||||
|
total_count,
|
||||||
|
filters = num_filters,
|
||||||
|
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
||||||
|
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||||
|
"GET /api/hexagon-stats"
|
||||||
|
);
|
||||||
|
|
||||||
|
output
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
[(axum::http::header::CONTENT_TYPE, "application/json")],
|
||||||
|
result,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_json_string(output: &mut String, value: &str) {
|
||||||
|
output.push('"');
|
||||||
|
for character in value.chars() {
|
||||||
|
match character {
|
||||||
|
'"' => output.push_str("\\\""),
|
||||||
|
'\\' => output.push_str("\\\\"),
|
||||||
|
'\n' => output.push_str("\\n"),
|
||||||
|
'\r' => output.push_str("\\r"),
|
||||||
|
'\t' => output.push_str("\\t"),
|
||||||
|
other => output.push(other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.push('"');
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_f64(value: f64) -> String {
|
||||||
|
if value.fract() == 0.0 && value.abs() < 1e15 {
|
||||||
|
format!("{:.1}", value)
|
||||||
|
} else {
|
||||||
|
format!("{}", value)
|
||||||
|
}
|
||||||
|
}
|
||||||
375
server-rs/src/routes/hexagons.rs
Normal file
375
server-rs/src/routes/hexagons.rs
Normal file
|
|
@ -0,0 +1,375 @@
|
||||||
|
use std::fmt::{self, Write};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::extract::Query;
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::IntoResponse;
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::consts::{
|
||||||
|
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN,
|
||||||
|
POSTCODE_MIN_RESOLUTION,
|
||||||
|
};
|
||||||
|
use crate::filter::parse_filters;
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
use super::parse::parse_bounds;
|
||||||
|
|
||||||
|
struct HumanBytes(usize);
|
||||||
|
|
||||||
|
impl fmt::Display for HumanBytes {
|
||||||
|
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let bytes = self.0;
|
||||||
|
if bytes >= 1_000_000 {
|
||||||
|
write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
|
||||||
|
} else if bytes >= 1_000 {
|
||||||
|
write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
|
||||||
|
} else {
|
||||||
|
write!(formatter, "{} B", bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct HexagonParams {
|
||||||
|
resolution: u8,
|
||||||
|
bounds: Option<String>,
|
||||||
|
/// Comma-separated filters: `name:min:max,...`
|
||||||
|
/// Rows must have non-NaN values within [min,max] for each filter.
|
||||||
|
filters: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-cell accumulator for aggregating features
|
||||||
|
struct CellAgg {
|
||||||
|
count: u32,
|
||||||
|
mins: Vec<f64>,
|
||||||
|
maxs: Vec<f64>,
|
||||||
|
/// Min/max ordinal indices for enum features (255 = no data yet)
|
||||||
|
enum_mins: Vec<u8>,
|
||||||
|
enum_maxs: Vec<u8>,
|
||||||
|
/// Most common postcode in this cell (only tracked at high resolutions)
|
||||||
|
postcode: Option<String>,
|
||||||
|
postcode_count: u32,
|
||||||
|
lat_sum: f64,
|
||||||
|
lon_sum: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CellAgg {
|
||||||
|
fn new(num_features: usize, num_enums: usize) -> Self {
|
||||||
|
CellAgg {
|
||||||
|
count: 0,
|
||||||
|
mins: vec![f64::INFINITY; num_features],
|
||||||
|
maxs: vec![f64::NEG_INFINITY; num_features],
|
||||||
|
enum_mins: vec![ENUM_NULL; num_enums],
|
||||||
|
enum_maxs: vec![0; num_enums],
|
||||||
|
postcode: None,
|
||||||
|
postcode_count: 0,
|
||||||
|
lat_sum: 0.0,
|
||||||
|
lon_sum: 0.0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a row using row-major feature_data layout.
|
||||||
|
/// feature_data[row * num_features + feat_idx] — all features for one row
|
||||||
|
/// are contiguous, so this reads a single cache line per ~8 features.
|
||||||
|
#[inline]
|
||||||
|
fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) {
|
||||||
|
self.count += 1;
|
||||||
|
let base = row * num_features;
|
||||||
|
let row_slice = &feature_data[base..base + num_features];
|
||||||
|
for (feat_index, &value) in row_slice.iter().enumerate() {
|
||||||
|
if value.is_finite() {
|
||||||
|
if value < self.mins[feat_index] {
|
||||||
|
self.mins[feat_index] = value;
|
||||||
|
}
|
||||||
|
if value > self.maxs[feat_index] {
|
||||||
|
self.maxs[feat_index] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Track min/max ordinal index for each enum feature in this cell.
|
||||||
|
#[inline]
|
||||||
|
fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
|
||||||
|
for (enum_index, enum_feature) in enum_features.iter().enumerate() {
|
||||||
|
let value = enum_feature.data[row];
|
||||||
|
if value != ENUM_NULL {
|
||||||
|
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
|
||||||
|
self.enum_mins[enum_index] = value;
|
||||||
|
}
|
||||||
|
if value > self.enum_maxs[enum_index] {
|
||||||
|
self.enum_maxs[enum_index] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Track postcode and centroid for high-resolution cells.
|
||||||
|
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
|
||||||
|
#[inline]
|
||||||
|
fn add_postcode(&mut self, postcode: &str, lat: f64, lon: f64) {
|
||||||
|
self.lat_sum += lat;
|
||||||
|
self.lon_sum += lon;
|
||||||
|
if postcode.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if self.postcode.is_none() {
|
||||||
|
self.postcode = Some(postcode.to_string());
|
||||||
|
self.postcode_count = 1;
|
||||||
|
} else if self.postcode.as_deref() == Some(postcode) {
|
||||||
|
self.postcode_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escape a string for inclusion in a JSON string literal.
|
||||||
|
pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
|
||||||
|
for character in text.chars() {
|
||||||
|
match character {
|
||||||
|
'"' => buf.push_str("\\\""),
|
||||||
|
'\\' => buf.push_str("\\\\"),
|
||||||
|
'\n' => buf.push_str("\\n"),
|
||||||
|
'\r' => buf.push_str("\\r"),
|
||||||
|
'\t' => buf.push_str("\\t"),
|
||||||
|
ctrl if ctrl < '\x20' => { let _ = write!(buf, "\\u{:04x}", ctrl as u32); }
|
||||||
|
other => buf.push(other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the hexagons JSON response directly to a String buffer,
|
||||||
|
/// avoiding serde_json::Value allocations entirely.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
fn write_hexagons_json(
|
||||||
|
buf: &mut String,
|
||||||
|
groups: &FxHashMap<u64, CellAgg>,
|
||||||
|
min_keys: &[String],
|
||||||
|
max_keys: &[String],
|
||||||
|
num_features: usize,
|
||||||
|
enum_min_keys: &[String],
|
||||||
|
enum_max_keys: &[String],
|
||||||
|
num_enums: usize,
|
||||||
|
include_postcode: bool,
|
||||||
|
) {
|
||||||
|
buf.push_str("{\"features\":[");
|
||||||
|
let mut first = true;
|
||||||
|
for (&cell_id, aggregation) in groups {
|
||||||
|
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if !first {
|
||||||
|
buf.push(',');
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
|
||||||
|
|
||||||
|
for feat_index in 0..num_features {
|
||||||
|
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
|
||||||
|
let _ = write!(
|
||||||
|
buf,
|
||||||
|
",\"{}\":{},\"{}\":{}",
|
||||||
|
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for enum_index in 0..num_enums {
|
||||||
|
if aggregation.enum_mins[enum_index] != ENUM_NULL {
|
||||||
|
let _ = write!(
|
||||||
|
buf,
|
||||||
|
",\"{}\":{},\"{}\":{}",
|
||||||
|
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
|
||||||
|
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if include_postcode {
|
||||||
|
if let Some(ref postcode) = aggregation.postcode {
|
||||||
|
let total = aggregation.count as f64;
|
||||||
|
let centroid_lat = aggregation.lat_sum / total;
|
||||||
|
let centroid_lon = aggregation.lon_sum / total;
|
||||||
|
if centroid_lat.is_finite() && centroid_lon.is_finite() {
|
||||||
|
buf.push_str(",\"postcode\":\"");
|
||||||
|
write_json_escaped(buf, postcode);
|
||||||
|
let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push('}');
|
||||||
|
}
|
||||||
|
buf.push_str("]}");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_hexagons(
|
||||||
|
state: Arc<AppState>,
|
||||||
|
Query(params): Query<HexagonParams>,
|
||||||
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||||
|
let resolution = params.resolution;
|
||||||
|
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
|
||||||
|
warn!(
|
||||||
|
resolution,
|
||||||
|
"Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
|
||||||
|
);
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
format!(
|
||||||
|
"resolution must be between {} and {}",
|
||||||
|
H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let bounds_str = params.bounds.ok_or((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"bounds parameter is required".into(),
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let (mut south, mut west, mut north, mut east) = parse_bounds(&bounds_str)?;
|
||||||
|
|
||||||
|
let lat_range = north - south;
|
||||||
|
let lng_range = east - west;
|
||||||
|
south -= lat_range * BOUNDS_BUFFER_PERCENT;
|
||||||
|
north += lat_range * BOUNDS_BUFFER_PERCENT;
|
||||||
|
west -= lng_range * BOUNDS_BUFFER_PERCENT;
|
||||||
|
east += lng_range * BOUNDS_BUFFER_PERCENT;
|
||||||
|
|
||||||
|
south = (south / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
|
||||||
|
west = (west / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
|
||||||
|
north = (north / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
|
||||||
|
east = (east / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
|
||||||
|
|
||||||
|
let filters_str = params.filters.clone();
|
||||||
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||||
|
params.filters.as_deref(),
|
||||||
|
&state.data.feature_names,
|
||||||
|
&state.data.enum_features,
|
||||||
|
);
|
||||||
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||||
|
|
||||||
|
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
|
||||||
|
let num_features = state.data.num_features;
|
||||||
|
let num_enums = state.data.enum_features.len();
|
||||||
|
let feature_data = &state.data.feature_data;
|
||||||
|
|
||||||
|
let min_keys = &state.min_keys;
|
||||||
|
let max_keys = &state.max_keys;
|
||||||
|
let enum_min_keys = &state.enum_min_keys;
|
||||||
|
let enum_max_keys = &state.enum_max_keys;
|
||||||
|
|
||||||
|
let h3_cells_for_res: Option<&[u64]> = state
|
||||||
|
.h3_cells
|
||||||
|
.get(resolution as usize)
|
||||||
|
.filter(|cells| !cells.is_empty())
|
||||||
|
.map(|cells| cells.as_slice());
|
||||||
|
|
||||||
|
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||||
|
|
||||||
|
let enum_features = &state.data.enum_features;
|
||||||
|
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
|
||||||
|
|
||||||
|
// Row-level filter check: numeric must be non-NaN and within [min, max],
|
||||||
|
// enum must have value index in the allowed set
|
||||||
|
let row_passes = |row: usize| -> bool {
|
||||||
|
parsed_filters.iter().all(|filter| {
|
||||||
|
let value = feature_data[row * num_features + filter.feat_idx];
|
||||||
|
value.is_finite() && value >= filter.min && value <= filter.max
|
||||||
|
}) && parsed_enum_filters.iter().all(|enum_filter| {
|
||||||
|
let value = enum_features[enum_filter.enum_idx].data[row];
|
||||||
|
value != ENUM_NULL && enum_filter.allowed.contains(&value)
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(precomputed) = h3_cells_for_res {
|
||||||
|
state
|
||||||
|
.grid
|
||||||
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||||
|
let row = row_idx as usize;
|
||||||
|
if !row_passes(row) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let cell_id = precomputed[row];
|
||||||
|
let aggregation = groups
|
||||||
|
.entry(cell_id)
|
||||||
|
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||||
|
aggregation.add_row(feature_data, row, num_features);
|
||||||
|
aggregation.add_enums(enum_features, row);
|
||||||
|
if include_postcode {
|
||||||
|
aggregation.add_postcode(
|
||||||
|
&state.data.postcode[row],
|
||||||
|
state.data.lat[row],
|
||||||
|
state.data.lon[row],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
let h3_res = h3o::Resolution::try_from(resolution)
|
||||||
|
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
|
||||||
|
state
|
||||||
|
.grid
|
||||||
|
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||||
|
let row = row_idx as usize;
|
||||||
|
if !row_passes(row) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
|
||||||
|
.map(|coord| u64::from(coord.to_cell(h3_res)))
|
||||||
|
.unwrap_or(0);
|
||||||
|
let aggregation = groups
|
||||||
|
.entry(cell_id)
|
||||||
|
.or_insert_with(|| CellAgg::new(num_features, num_enums));
|
||||||
|
aggregation.add_row(feature_data, row, num_features);
|
||||||
|
aggregation.add_enums(enum_features, row);
|
||||||
|
if include_postcode {
|
||||||
|
aggregation.add_postcode(
|
||||||
|
&state.data.postcode[row],
|
||||||
|
state.data.lat[row],
|
||||||
|
state.data.lon[row],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let t_agg = t0.elapsed();
|
||||||
|
|
||||||
|
let mut json_buf = String::with_capacity(groups.len() * 128);
|
||||||
|
write_hexagons_json(
|
||||||
|
&mut json_buf,
|
||||||
|
&groups,
|
||||||
|
min_keys,
|
||||||
|
max_keys,
|
||||||
|
num_features,
|
||||||
|
enum_min_keys,
|
||||||
|
enum_max_keys,
|
||||||
|
num_enums,
|
||||||
|
include_postcode,
|
||||||
|
);
|
||||||
|
|
||||||
|
let t_total = t0.elapsed();
|
||||||
|
info!(
|
||||||
|
resolution,
|
||||||
|
cells = groups.len(),
|
||||||
|
filters = num_filters,
|
||||||
|
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
||||||
|
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
|
||||||
|
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
|
||||||
|
size = format_args!("{}", HumanBytes(json_buf.len())),
|
||||||
|
"GET /api/hexagons"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(json_buf)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
|
||||||
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
|
||||||
|
|
||||||
|
Ok(([("content-type", "application/json")], json_body))
|
||||||
|
}
|
||||||
12
server-rs/src/routes/mod.rs
Normal file
12
server-rs/src/routes/mod.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
mod features;
|
||||||
|
pub(crate) mod hexagons;
|
||||||
|
mod hexagon_stats;
|
||||||
|
pub(crate) mod parse;
|
||||||
|
mod pois;
|
||||||
|
pub(crate) mod properties;
|
||||||
|
|
||||||
|
pub use features::get_features;
|
||||||
|
pub use hexagon_stats::get_hexagon_stats;
|
||||||
|
pub use hexagons::get_hexagons;
|
||||||
|
pub use pois::{get_poi_categories, get_pois};
|
||||||
|
pub use properties::get_hexagon_properties;
|
||||||
52
server-rs/src/routes/parse.rs
Normal file
52
server-rs/src/routes/parse.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
|
||||||
|
/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
|
||||||
|
pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
|
||||||
|
let boundary = cell.boundary();
|
||||||
|
let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||||
|
let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||||
|
for vertex in boundary.iter() {
|
||||||
|
let lat = vertex.lat();
|
||||||
|
let lon = vertex.lng();
|
||||||
|
if lat < min_lat {
|
||||||
|
min_lat = lat;
|
||||||
|
}
|
||||||
|
if lat > max_lat {
|
||||||
|
max_lat = lat;
|
||||||
|
}
|
||||||
|
if lon < min_lon {
|
||||||
|
min_lon = lon;
|
||||||
|
}
|
||||||
|
if lon > max_lon {
|
||||||
|
max_lon = lon;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(
|
||||||
|
min_lat - buffer,
|
||||||
|
min_lon - buffer,
|
||||||
|
max_lat + buffer,
|
||||||
|
max_lon + buffer,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCode, String)> {
|
||||||
|
let parts: Vec<f64> = bounds_str
|
||||||
|
.split(',')
|
||||||
|
.map(|part| part.trim().parse::<f64>())
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(|_| {
|
||||||
|
(
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if parts.len() != 4 {
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"Invalid bounds format. Use: south,west,north,east".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((parts[0], parts[1], parts[2], parts[3]))
|
||||||
|
}
|
||||||
128
server-rs/src/routes/pois.rs
Normal file
128
server-rs/src/routes/pois.rs
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::extract::Query;
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::Json;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::consts::MAX_POIS_PER_REQUEST;
|
||||||
|
use crate::data::POI;
|
||||||
|
use crate::state::{AppState, POICategoryGroup};
|
||||||
|
|
||||||
|
use super::parse::parse_bounds;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct POIParams {
|
||||||
|
bounds: Option<String>,
|
||||||
|
/// Comma-separated list of categories to filter by
|
||||||
|
categories: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct POIsResponse {
|
||||||
|
pois: Vec<POI>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_pois(
|
||||||
|
state: Arc<AppState>,
|
||||||
|
Query(params): Query<POIParams>,
|
||||||
|
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
|
||||||
|
let bounds_str = params.bounds.ok_or((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"bounds parameter is required".into(),
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let (south, west, north, east) = parse_bounds(&bounds_str)?;
|
||||||
|
|
||||||
|
let categories_str = params.categories.clone();
|
||||||
|
let category_filter: Option<rustc_hash::FxHashSet<String>> = params
|
||||||
|
.categories
|
||||||
|
.as_deref()
|
||||||
|
.filter(|text| !text.is_empty())
|
||||||
|
.map(|text| text.split(',').map(|part| part.trim().to_string()).collect());
|
||||||
|
|
||||||
|
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
|
||||||
|
|
||||||
|
let result = tokio::task::spawn_blocking(move || {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
let row_indices = state.poi_grid.query(south, west, north, east);
|
||||||
|
|
||||||
|
// Collect matching row indices first, then sample randomly so the
|
||||||
|
// subset covers the viewport uniformly instead of clustering in one area.
|
||||||
|
let mut matching_rows: Vec<usize> = row_indices
|
||||||
|
.iter()
|
||||||
|
.filter_map(|&row_idx| {
|
||||||
|
let row = row_idx as usize;
|
||||||
|
if let Some(ref categories) = category_filter {
|
||||||
|
if !categories.contains(&state.poi_data.category[row]) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(row)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
||||||
|
// Use a power-of-2 sampling step so each POI's inclusion depends
|
||||||
|
// only on its own priority hash, not on what other POIs are in
|
||||||
|
// the viewport. This prevents visible reshuffling when panning.
|
||||||
|
let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
|
||||||
|
let step = ratio.next_power_of_two();
|
||||||
|
let mask = step - 1;
|
||||||
|
matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
|
||||||
|
// Statistical noise may leave us slightly over the limit
|
||||||
|
if matching_rows.len() > MAX_POIS_PER_REQUEST {
|
||||||
|
matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
|
||||||
|
matching_rows.truncate(MAX_POIS_PER_REQUEST);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let pois: Vec<POI> = matching_rows
|
||||||
|
.iter()
|
||||||
|
.map(|&row| POI {
|
||||||
|
id: state.poi_data.id[row].clone(),
|
||||||
|
name: state.poi_data.name[row].clone(),
|
||||||
|
category: state.poi_data.category[row].clone(),
|
||||||
|
group: state.poi_data.group[row].clone(),
|
||||||
|
lat: state.poi_data.lat[row],
|
||||||
|
lng: state.poi_data.lng[row],
|
||||||
|
emoji: state.poi_data.emoji[row].clone(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let elapsed = t0.elapsed();
|
||||||
|
info!(
|
||||||
|
results = pois.len(),
|
||||||
|
candidates = row_indices.len(),
|
||||||
|
categories = num_categories,
|
||||||
|
categories_raw = categories_str.as_deref().unwrap_or("-"),
|
||||||
|
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||||
|
"GET /api/pois"
|
||||||
|
);
|
||||||
|
|
||||||
|
POIsResponse { pois }
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||||
|
|
||||||
|
Ok(Json(result))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct POICategoriesResponse {
|
||||||
|
groups: Vec<POICategoryGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
|
||||||
|
let groups: Vec<POICategoryGroup> = state.poi_category_groups.clone();
|
||||||
|
|
||||||
|
let total: usize = groups.iter().map(|group| group.categories.len()).sum();
|
||||||
|
info!(
|
||||||
|
count = total,
|
||||||
|
groups = groups.len(),
|
||||||
|
"GET /api/poi-categories"
|
||||||
|
);
|
||||||
|
|
||||||
|
Json(POICategoriesResponse { groups })
|
||||||
|
}
|
||||||
230
server-rs/src/routes/properties.rs
Normal file
230
server-rs/src/routes/properties.rs
Normal file
|
|
@ -0,0 +1,230 @@
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::extract::Query;
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::Json;
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, MAX_PROPERTIES_LIMIT};
|
||||||
|
use crate::data::EnumFeatureData;
|
||||||
|
use crate::filter::{parse_filters, row_passes_filters};
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
use super::parse::h3_cell_bounds;
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct HexagonPropertiesParams {
|
||||||
|
pub h3: String,
|
||||||
|
pub resolution: u8,
|
||||||
|
pub filters: Option<String>,
|
||||||
|
pub limit: Option<usize>,
|
||||||
|
pub offset: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct Property {
|
||||||
|
// String fields
|
||||||
|
pub address: Option<String>,
|
||||||
|
pub postcode: Option<String>,
|
||||||
|
pub property_type: Option<String>,
|
||||||
|
pub built_form: Option<String>,
|
||||||
|
pub duration: Option<String>,
|
||||||
|
pub current_energy_rating: Option<String>,
|
||||||
|
pub potential_energy_rating: Option<String>,
|
||||||
|
|
||||||
|
// Numeric fields
|
||||||
|
pub lat: f64,
|
||||||
|
pub lon: f64,
|
||||||
|
|
||||||
|
pub is_construction_date_approximate: Option<bool>,
|
||||||
|
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub features: FxHashMap<String, f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct HexagonPropertiesResponse {
|
||||||
|
pub properties: Vec<Property>,
|
||||||
|
pub total: usize,
|
||||||
|
pub limit: usize,
|
||||||
|
pub offset: usize,
|
||||||
|
pub truncated: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_empty_string(text: &str) -> Option<String> {
|
||||||
|
let trimmed = text.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(trimmed.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lookup_enum_value(
|
||||||
|
enum_features: &[EnumFeatureData],
|
||||||
|
enum_idx: &FxHashMap<String, usize>,
|
||||||
|
row: usize,
|
||||||
|
names: &[&str],
|
||||||
|
) -> Option<String> {
|
||||||
|
for name in names {
|
||||||
|
if let Some(&feature_index) = enum_idx.get(*name) {
|
||||||
|
let enum_feature = &enum_features[feature_index];
|
||||||
|
let data_index = enum_feature.data[row];
|
||||||
|
if data_index != ENUM_NULL {
|
||||||
|
if let Some(value) = enum_feature.values.get(data_index as usize) {
|
||||||
|
return Some(value.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_hexagon_properties(
|
||||||
|
state: Arc<AppState>,
|
||||||
|
Query(params): Query<HexagonPropertiesParams>,
|
||||||
|
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
|
||||||
|
let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| {
|
||||||
|
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
|
||||||
|
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
|
||||||
|
})?;
|
||||||
|
let cell_u64: u64 = cell.into();
|
||||||
|
|
||||||
|
let resolution = params.resolution as usize;
|
||||||
|
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
|
||||||
|
warn!(
|
||||||
|
resolution,
|
||||||
|
"Invalid or non-precomputed resolution for hexagon-properties"
|
||||||
|
);
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
"Invalid or non-precomputed resolution".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let h3_str = params.h3.clone();
|
||||||
|
let filters_str = params.filters.clone();
|
||||||
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||||
|
params.filters.as_deref(),
|
||||||
|
&state.data.feature_names,
|
||||||
|
&state.data.enum_features,
|
||||||
|
);
|
||||||
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||||
|
|
||||||
|
let result = tokio::task::spawn_blocking(move || {
|
||||||
|
let t0 = std::time::Instant::now();
|
||||||
|
let h3_data = &state.h3_cells[resolution];
|
||||||
|
let num_features = state.data.num_features;
|
||||||
|
let feature_data = &state.data.feature_data;
|
||||||
|
let enum_features = &state.data.enum_features;
|
||||||
|
|
||||||
|
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||||
|
|
||||||
|
let mut matching_rows: Vec<usize> = Vec::new();
|
||||||
|
state
|
||||||
|
.grid
|
||||||
|
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
||||||
|
let row = row_idx as usize;
|
||||||
|
if h3_data[row] == cell_u64
|
||||||
|
&& row_passes_filters(
|
||||||
|
row,
|
||||||
|
&parsed_filters,
|
||||||
|
&parsed_enum_filters,
|
||||||
|
feature_data,
|
||||||
|
num_features,
|
||||||
|
enum_features,
|
||||||
|
)
|
||||||
|
{
|
||||||
|
matching_rows.push(row);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let total = matching_rows.len();
|
||||||
|
let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT).min(MAX_PROPERTIES_LIMIT);
|
||||||
|
let offset = params.offset.unwrap_or(0);
|
||||||
|
let truncated = total > offset + limit;
|
||||||
|
|
||||||
|
let properties: Vec<Property> = matching_rows
|
||||||
|
.iter()
|
||||||
|
.skip(offset)
|
||||||
|
.take(limit)
|
||||||
|
.map(|&row| {
|
||||||
|
let mut features = FxHashMap::default();
|
||||||
|
let base = row * num_features;
|
||||||
|
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
|
||||||
|
let value = feature_data[base + feat_idx];
|
||||||
|
if value.is_finite() {
|
||||||
|
features.insert(feat_name.clone(), value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Property {
|
||||||
|
address: non_empty_string(&state.data.address[row]),
|
||||||
|
postcode: non_empty_string(&state.data.postcode[row]),
|
||||||
|
is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
|
||||||
|
property_type: lookup_enum_value(
|
||||||
|
enum_features,
|
||||||
|
&state.enum_name_to_idx,
|
||||||
|
row,
|
||||||
|
&["Property type", "epc_property_type", "pp_property_type"],
|
||||||
|
),
|
||||||
|
built_form: lookup_enum_value(
|
||||||
|
enum_features,
|
||||||
|
&state.enum_name_to_idx,
|
||||||
|
row,
|
||||||
|
&["Property type/built form", "built_form"],
|
||||||
|
),
|
||||||
|
duration: lookup_enum_value(
|
||||||
|
enum_features,
|
||||||
|
&state.enum_name_to_idx,
|
||||||
|
row,
|
||||||
|
&["Leashold/Freehold", "duration"],
|
||||||
|
),
|
||||||
|
current_energy_rating: lookup_enum_value(
|
||||||
|
enum_features,
|
||||||
|
&state.enum_name_to_idx,
|
||||||
|
row,
|
||||||
|
&["Current energy rating", "current_energy_rating"],
|
||||||
|
),
|
||||||
|
potential_energy_rating: lookup_enum_value(
|
||||||
|
enum_features,
|
||||||
|
&state.enum_name_to_idx,
|
||||||
|
row,
|
||||||
|
&["Potential energy rating", "potential_energy_rating"],
|
||||||
|
),
|
||||||
|
lat: state.data.lat[row],
|
||||||
|
lon: state.data.lon[row],
|
||||||
|
features,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let elapsed = t0.elapsed();
|
||||||
|
info!(
|
||||||
|
h3 = %h3_str,
|
||||||
|
resolution,
|
||||||
|
total,
|
||||||
|
returned = properties.len(),
|
||||||
|
offset,
|
||||||
|
filters = num_filters,
|
||||||
|
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
||||||
|
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
||||||
|
"GET /api/hexagon-properties"
|
||||||
|
);
|
||||||
|
|
||||||
|
HexagonPropertiesResponse {
|
||||||
|
properties,
|
||||||
|
total,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
truncated,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
|
||||||
|
|
||||||
|
Ok(Json(result))
|
||||||
|
}
|
||||||
33
server-rs/src/state.rs
Normal file
33
server-rs/src/state.rs
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
use rustc_hash::FxHashMap;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::data::{POIData, PropertyData};
|
||||||
|
use crate::grid_index::GridIndex;
|
||||||
|
|
||||||
|
#[derive(Serialize, Clone)]
|
||||||
|
pub struct POICategoryGroup {
|
||||||
|
pub name: String,
|
||||||
|
pub categories: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AppState {
|
||||||
|
pub data: PropertyData,
|
||||||
|
pub grid: GridIndex,
|
||||||
|
/// h3_cells[resolution][row_idx] = precomputed H3 cell ID.
|
||||||
|
/// Empty Vec for resolutions not precomputed.
|
||||||
|
pub h3_cells: Vec<Vec<u64>>,
|
||||||
|
pub poi_data: POIData,
|
||||||
|
pub poi_grid: GridIndex,
|
||||||
|
/// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
|
||||||
|
pub min_keys: Vec<String>,
|
||||||
|
/// Precomputed JSON key names: "max_{feature_name}" for each numeric feature
|
||||||
|
pub max_keys: Vec<String>,
|
||||||
|
/// Precomputed JSON key names: "min_{enum_name}" for each enum feature
|
||||||
|
pub enum_min_keys: Vec<String>,
|
||||||
|
/// Precomputed JSON key names: "max_{enum_name}" for each enum feature
|
||||||
|
pub enum_max_keys: Vec<String>,
|
||||||
|
/// Precomputed POI category groups (sorted)
|
||||||
|
pub poi_category_groups: Vec<POICategoryGroup>,
|
||||||
|
/// Precomputed map from enum feature name to index in data.enum_features
|
||||||
|
pub enum_name_to_idx: FxHashMap<String, usize>,
|
||||||
|
}
|
||||||
250
server-rs/src/tests.rs
Normal file
250
server-rs/src/tests.rs
Normal file
|
|
@ -0,0 +1,250 @@
|
||||||
|
#[cfg(test)]
|
||||||
|
mod grid_index_tests {
|
||||||
|
use crate::grid_index::GridIndex;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_bounds_fully_below_grid_returns_empty() {
|
||||||
|
let lat = vec![50.0, 50.5, 51.0];
|
||||||
|
let lon = vec![0.0, 0.5, 1.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
let results = grid.query(10.0, -10.0, 20.0, -5.0);
|
||||||
|
assert!(
|
||||||
|
results.is_empty(),
|
||||||
|
"Should return empty for bounds fully below grid"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_bounds_fully_above_grid_returns_empty() {
|
||||||
|
let lat = vec![50.0, 50.5, 51.0];
|
||||||
|
let lon = vec![0.0, 0.5, 1.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
let results = grid.query(80.0, 50.0, 90.0, 60.0);
|
||||||
|
assert!(
|
||||||
|
results.is_empty(),
|
||||||
|
"Should return empty for bounds fully above grid"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_inverted_bounds_returns_empty() {
|
||||||
|
let lat = vec![50.0, 50.5, 51.0];
|
||||||
|
let lon = vec![0.0, 0.5, 1.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
// south > north
|
||||||
|
let results = grid.query(52.0, 0.0, 49.0, 1.0);
|
||||||
|
assert!(
|
||||||
|
results.is_empty(),
|
||||||
|
"Should return empty for inverted bounds"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn for_each_bounds_fully_outside_yields_nothing() {
|
||||||
|
let lat = vec![50.0, 50.5, 51.0];
|
||||||
|
let lon = vec![0.0, 0.5, 1.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
let mut count = 0;
|
||||||
|
grid.for_each_in_bounds(10.0, -10.0, 20.0, -5.0, |_| count += 1);
|
||||||
|
assert_eq!(
|
||||||
|
count, 0,
|
||||||
|
"for_each should yield nothing for out-of-bounds query"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_with_large_cells_outside_returns_empty() {
|
||||||
|
// Previously, out-of-bounds queries with large cell sizes would
|
||||||
|
// scan cell (0,0) which could contain data. Now returns empty.
|
||||||
|
let lat = vec![50.0];
|
||||||
|
let lon = vec![0.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 1.0);
|
||||||
|
|
||||||
|
let results = grid.query(0.0, -50.0, 10.0, -40.0);
|
||||||
|
assert!(
|
||||||
|
results.is_empty(),
|
||||||
|
"Should return empty even with large cell size"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_within_bounds_returns_correct_results() {
|
||||||
|
let lat = vec![50.0, 50.5, 51.0];
|
||||||
|
let lon = vec![0.0, 0.5, 1.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
let results = grid.query(49.9, -0.1, 51.1, 1.1);
|
||||||
|
assert_eq!(results.len(), 3, "Should return all 3 points within bounds");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn query_partial_bounds_returns_subset() {
|
||||||
|
let lat = vec![50.0, 51.0, 52.0];
|
||||||
|
let lon = vec![0.0, 0.0, 0.0];
|
||||||
|
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||||
|
|
||||||
|
let results = grid.query(49.9, -0.1, 50.1, 0.1);
|
||||||
|
assert_eq!(results.len(), 1, "Should return only the point at lat=50");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod filter_tests {
|
||||||
|
use crate::data::EnumFeatureData;
|
||||||
|
use crate::filter::{parse_filters, row_passes_filters};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nan_rows_fail_numeric_filter_even_with_infinite_range() {
|
||||||
|
let feature_names = vec!["price".to_string()];
|
||||||
|
let feature_data = vec![f64::NAN];
|
||||||
|
let enum_features: Vec<EnumFeatureData> = vec![];
|
||||||
|
|
||||||
|
let (numeric, enums) =
|
||||||
|
parse_filters(Some("price:-inf:inf"), &feature_names, &enum_features);
|
||||||
|
assert_eq!(numeric.len(), 1, "Should parse -inf:inf as valid filter");
|
||||||
|
|
||||||
|
let passes = row_passes_filters(0, &numeric, &enums, &feature_data, 1, &enum_features);
|
||||||
|
assert!(!passes, "NaN should fail filter even with infinite range");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_enum_filter_value_rejects_all() {
|
||||||
|
let enum_features = vec![EnumFeatureData {
|
||||||
|
name: "rating".to_string(),
|
||||||
|
values: vec!["A".to_string(), "B".to_string()],
|
||||||
|
data: vec![0],
|
||||||
|
}];
|
||||||
|
let feature_names: Vec<String> = vec![];
|
||||||
|
|
||||||
|
let (numeric, enums) = parse_filters(Some("rating:"), &feature_names, &enum_features);
|
||||||
|
assert_eq!(enums.len(), 1);
|
||||||
|
assert!(enums[0].allowed.is_empty());
|
||||||
|
|
||||||
|
let passes = row_passes_filters(0, &numeric, &enums, &[], 0, &enum_features);
|
||||||
|
assert!(!passes, "Empty allowed set should reject all rows");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn enum_filter_with_nonexistent_values_produces_empty_allowed() {
|
||||||
|
let enum_features = vec![EnumFeatureData {
|
||||||
|
name: "rating".to_string(),
|
||||||
|
values: vec!["A".to_string(), "B".to_string()],
|
||||||
|
data: vec![0],
|
||||||
|
}];
|
||||||
|
let feature_names: Vec<String> = vec![];
|
||||||
|
|
||||||
|
let (_, enums) = parse_filters(Some("rating:X|Y|Z"), &feature_names, &enum_features);
|
||||||
|
assert_eq!(enums.len(), 1);
|
||||||
|
assert!(enums[0].allowed.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn malformed_numeric_min_is_silently_skipped() {
|
||||||
|
let feature_names = vec!["price".to_string()];
|
||||||
|
let enum_features: Vec<EnumFeatureData> = vec![];
|
||||||
|
|
||||||
|
let (numeric, enums) = parse_filters(
|
||||||
|
Some("price:not_a_number:200"),
|
||||||
|
&feature_names,
|
||||||
|
&enum_features,
|
||||||
|
);
|
||||||
|
assert_eq!(numeric.len(), 0);
|
||||||
|
assert_eq!(enums.len(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod json_tests {
|
||||||
|
#[test]
|
||||||
|
fn json_escaped_postcode_with_quotes_is_valid() {
|
||||||
|
use crate::routes::hexagons::write_json_escaped;
|
||||||
|
|
||||||
|
let mut buf = String::new();
|
||||||
|
buf.push_str("{\"postcode\":\"");
|
||||||
|
write_json_escaped(&mut buf, "SW1A \"test");
|
||||||
|
buf.push_str("\"}");
|
||||||
|
|
||||||
|
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"Escaped quote should produce valid JSON: {}",
|
||||||
|
buf
|
||||||
|
);
|
||||||
|
assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A \"test");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn json_escaped_postcode_with_backslash_is_valid() {
|
||||||
|
use crate::routes::hexagons::write_json_escaped;
|
||||||
|
|
||||||
|
let mut buf = String::new();
|
||||||
|
buf.push_str("{\"postcode\":\"");
|
||||||
|
write_json_escaped(&mut buf, "SW1A\\2AA");
|
||||||
|
buf.push_str("\"}");
|
||||||
|
|
||||||
|
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"Escaped backslash should produce valid JSON: {}",
|
||||||
|
buf
|
||||||
|
);
|
||||||
|
assert_eq!(result.unwrap()["postcode"].as_str().unwrap(), "SW1A\\2AA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nan_is_not_valid_json() {
|
||||||
|
use std::fmt::Write;
|
||||||
|
// Verify that raw NaN in write! is still invalid JSON (documenting the risk
|
||||||
|
// that the is_finite() guard in write_hexagons_json protects against).
|
||||||
|
let mut buf = String::new();
|
||||||
|
write!(buf, "{{\"min_price\":{}}}", f64::NAN).unwrap();
|
||||||
|
|
||||||
|
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||||
|
assert!(result.is_err(), "Raw NaN should produce invalid JSON");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn infinity_is_not_valid_json() {
|
||||||
|
use std::fmt::Write;
|
||||||
|
let mut buf = String::new();
|
||||||
|
write!(buf, "{{\"min_price\":{}}}", f64::INFINITY).unwrap();
|
||||||
|
|
||||||
|
let result: Result<serde_json::Value, _> = serde_json::from_str(&buf);
|
||||||
|
assert!(result.is_err(), "Raw Infinity should produce invalid JSON");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod enum_encoding_tests {
|
||||||
|
#[test]
|
||||||
|
fn u8_cast_wraps_around_beyond_255() {
|
||||||
|
// Documents the underlying u8 wrapping behavior that the truncation
|
||||||
|
// guard in property.rs now prevents.
|
||||||
|
let num_values = 300usize;
|
||||||
|
let indices: Vec<u8> = (0..num_values).map(|index| index as u8).collect();
|
||||||
|
|
||||||
|
assert_eq!(indices[0], indices[256], "u8 wraps: 0 == 256");
|
||||||
|
assert_eq!(indices[1], indices[257], "u8 wraps: 1 == 257");
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
let values: Vec<String> = (0..num_values).map(|i| format!("val_{}", i)).collect();
|
||||||
|
let value_to_idx: HashMap<&str, u8> = values
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, value)| (value.as_str(), index as u8))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let unique_indices: std::collections::HashSet<u8> =
|
||||||
|
value_to_idx.values().cloned().collect();
|
||||||
|
assert!(
|
||||||
|
unique_indices.len() < num_values,
|
||||||
|
"Without the truncation guard, {} values produce only {} unique u8 indices",
|
||||||
|
num_values,
|
||||||
|
unique_indices.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
"""Server configuration - imports shared values from pipeline config."""
|
|
||||||
|
|
||||||
from pipeline.config import (
|
|
||||||
AGGREGATES_DIR,
|
|
||||||
H3_RESOLUTIONS as VALID_RESOLUTIONS,
|
|
||||||
DEFAULT_H3_RESOLUTION as DEFAULT_RESOLUTION,
|
|
||||||
MIN_YEAR,
|
|
||||||
MAX_YEAR,
|
|
||||||
DEFAULT_MIN_YEAR,
|
|
||||||
DEFAULT_MAX_YEAR,
|
|
||||||
DEFAULT_MIN_PRICE,
|
|
||||||
DEFAULT_MAX_PRICE,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extra area to return beyond requested bounds (0.2 = 20%)
|
|
||||||
# Makes panning smoother by preloading nearby hexagons
|
|
||||||
BOUNDS_BUFFER_PERCENT = 0.2
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"AGGREGATES_DIR",
|
|
||||||
"VALID_RESOLUTIONS",
|
|
||||||
"DEFAULT_RESOLUTION",
|
|
||||||
"MIN_YEAR",
|
|
||||||
"MAX_YEAR",
|
|
||||||
"DEFAULT_MIN_YEAR",
|
|
||||||
"DEFAULT_MAX_YEAR",
|
|
||||||
"DEFAULT_MIN_PRICE",
|
|
||||||
"DEFAULT_MAX_PRICE",
|
|
||||||
"BOUNDS_BUFFER_PERCENT",
|
|
||||||
]
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
from contextlib import asynccontextmanager
|
|
||||||
from pathlib import Path
|
|
||||||
from fastapi import FastAPI
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from fastapi.staticfiles import StaticFiles
|
|
||||||
|
|
||||||
from server.routes import hexagons, pois
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
|
||||||
async def lifespan(app: FastAPI):
|
|
||||||
# Startup: preload all parquet files
|
|
||||||
hexagons.preload_dataframes()
|
|
||||||
pois.preload_pois()
|
|
||||||
yield
|
|
||||||
# Shutdown: nothing to clean up
|
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(title="Property Map API", lifespan=lifespan)
|
|
||||||
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=["*"],
|
|
||||||
allow_credentials=False, # Cannot use True with wildcard origins
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
app.include_router(hexagons.router, prefix="/api")
|
|
||||||
app.include_router(pois.router, prefix="/api")
|
|
||||||
|
|
||||||
# Mount static files for production (frontend build)
|
|
||||||
frontend_dist = Path(__file__).parent.parent / "frontend" / "dist"
|
|
||||||
if frontend_dist.exists():
|
|
||||||
app.mount("/", StaticFiles(directory=frontend_dist, html=True), name="static")
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue