Add POIs and journey times to map

This commit is contained in:
Andras Schmelczer 2026-01-28 22:10:41 +00:00
parent 7bfb1729bf
commit 500b9ef2aa
11 changed files with 914 additions and 177 deletions

View file

@ -10,7 +10,7 @@ import type {
ApiResponse,
POI,
POIResponse,
POICategoryGroup,
POICategoriesMap,
ColorMode,
} from './types';
@ -55,23 +55,30 @@ export default function App() {
// POI state
const [pois, setPois] = useState<POI[]>([]);
const [selectedPOICategories, setSelectedPOICategories] = useState<Set<POICategoryGroup>>(
new Set()
);
const [poiCategories, setPOICategories] = useState<POICategoriesMap>({});
const [selectedPOICategories, setSelectedPOICategories] = useState<Set<string>>(new Set());
const poiDebounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const poiAbortControllerRef = useRef<AbortController | null>(null);
// Fetch POI category definitions from server on mount
useEffect(() => {
fetch(`${getApiBaseUrl()}/api/poi-categories`)
.then((res) => res.json())
.then((json: { categories: POICategoriesMap }) => {
setPOICategories(json.categories);
})
.catch((err) => console.error('Failed to fetch POI categories:', err));
}, []);
// Debounced fetch when dependencies change
useEffect(() => {
if (!bounds) return;
// Clear previous debounce timer
if (debounceRef.current) {
clearTimeout(debounceRef.current);
}
debounceRef.current = setTimeout(async () => {
// Cancel any in-flight request
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
@ -167,6 +174,7 @@ export default function App() {
filters={filters}
onChange={setFilters}
zoom={zoom}
poiCategories={poiCategories}
selectedPOICategories={selectedPOICategories}
onPOICategoriesChange={setSelectedPOICategories}
colorMode={colorMode}

View file

@ -1,32 +1,25 @@
import { useState, useRef, useEffect } from 'react';
import { Slider } from './ui/slider';
import { Label } from './ui/label';
import { YEAR_MIN, YEAR_MAX, YEAR_STEP, PRICE_MIN, PRICE_MAX, PRICE_STEP } from '../lib/constants';
import type { Filters as FiltersType, POICategoryGroup, ColorMode } from '../types';
import { POI_CATEGORY_GROUPS } from '../types';
import type { Filters as FiltersType, POICategoriesMap, ColorMode } from '../types';
interface FiltersProps {
filters: FiltersType;
onChange: (filters: FiltersType) => void;
zoom: number;
selectedPOICategories: Set<POICategoryGroup>;
onPOICategoriesChange: (categories: Set<POICategoryGroup>) => void;
poiCategories: POICategoriesMap;
selectedPOICategories: Set<string>;
onPOICategoriesChange: (categories: Set<string>) => void;
colorMode: ColorMode;
onColorModeChange: (mode: ColorMode) => void;
}
const POI_LABELS: Record<POICategoryGroup, string> = {
schools: '🏫 Schools',
healthcare: '🏥 Healthcare',
transport: '🚉 Transport',
parks: '🌳 Parks',
emergency: '🚨 Emergency',
supermarkets: '🛒 Supermarkets',
};
export default function Filters({
filters,
onChange,
zoom,
poiCategories,
selectedPOICategories,
onPOICategoriesChange,
colorMode,
@ -34,16 +27,41 @@ export default function Filters({
}: FiltersProps) {
const update = (key: keyof FiltersType, value: number) => onChange({ ...filters, [key]: value });
const togglePOICategory = (category: POICategoryGroup) => {
const [dropdownOpen, setDropdownOpen] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null);
// Close dropdown when clicking outside
useEffect(() => {
function handleClickOutside(event: MouseEvent) {
if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
setDropdownOpen(false);
}
}
document.addEventListener('mousedown', handleClickOutside);
return () => document.removeEventListener('mousedown', handleClickOutside);
}, []);
const toggleCategory = (key: string) => {
const newSet = new Set(selectedPOICategories);
if (newSet.has(category)) {
newSet.delete(category);
if (newSet.has(key)) {
newSet.delete(key);
} else {
newSet.add(category);
newSet.add(key);
}
onPOICategoriesChange(newSet);
};
const selectAll = () => {
onPOICategoriesChange(new Set(Object.keys(poiCategories)));
};
const selectNone = () => {
onPOICategoriesChange(new Set());
};
const categoryKeys = Object.keys(poiCategories);
const selectedCount = selectedPOICategories.size;
return (
<div className="w-72 p-4 bg-white shadow-lg space-y-6 overflow-y-auto max-h-screen">
<h1 className="text-xl font-bold">UK Property Prices</h1>
@ -139,22 +157,70 @@ export default function Filters({
</div>
)}
<div className="space-y-2">
<div className="space-y-2" ref={dropdownRef}>
<Label>Points of Interest</Label>
<div className="space-y-1">
{POI_CATEGORY_GROUPS.map((category) => (
<label key={category} className="flex items-center gap-2 cursor-pointer">
<button
onClick={() => setDropdownOpen(!dropdownOpen)}
className="w-full flex items-center justify-between px-3 py-2 text-sm border border-slate-300 rounded hover:border-slate-400 bg-white"
>
<span className="truncate text-left">
{selectedCount === 0
? 'Select categories...'
: selectedCount === categoryKeys.length
? 'All categories'
: `${selectedCount} selected`}
</span>
<svg
className={`w-4 h-4 ml-2 flex-shrink-0 transition-transform ${dropdownOpen ? 'rotate-180' : ''}`}
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</button>
{dropdownOpen && (
<div className="border border-slate-300 rounded shadow-lg bg-white">
<div className="flex gap-2 px-3 py-2 border-b border-slate-200">
<button
onClick={selectAll}
className="text-xs text-blue-600 hover:text-blue-800"
>
All
</button>
<span className="text-xs text-slate-300">|</span>
<button
onClick={selectNone}
className="text-xs text-blue-600 hover:text-blue-800"
>
None
</button>
</div>
<div className="max-h-64 overflow-y-auto py-1">
{categoryKeys.map((key) => {
const { emoji, label } = poiCategories[key];
return (
<label
key={key}
className="flex items-center gap-2 px-3 py-1.5 hover:bg-slate-50 cursor-pointer"
>
<input
type="checkbox"
checked={selectedPOICategories.has(category)}
onChange={() => togglePOICategory(category)}
checked={selectedPOICategories.has(key)}
onChange={() => toggleCategory(key)}
className="rounded"
/>
<span className="text-sm">{POI_LABELS[category]}</span>
<span className="text-sm">
{emoji} {label}
</span>
</label>
))}
);
})}
</div>
</div>
)}
</div>
</div>
);
}

View file

@ -1,6 +1,7 @@
import { useCallback, useRef, useEffect, useState, useMemo } from 'react';
import { Map as MapGL } from 'react-map-gl/maplibre';
import DeckGL from '@deck.gl/react';
import { Map as MapGL, useControl } from 'react-map-gl/maplibre';
import type { MapRef } from 'react-map-gl/maplibre';
import { MapboxOverlay } from '@deck.gl/mapbox';
import { H3HexagonLayer } from '@deck.gl/geo-layers';
import { IconLayer } from '@deck.gl/layers';
import type { PickingInfo } from '@deck.gl/core';
@ -19,35 +20,119 @@ const TWEMOJI_BASE = 'https://cdn.jsdelivr.net/gh/twitter/twemoji@14.0.2/assets/
// Map category to Twemoji codepoint (emoji unicode -> hex)
const POI_EMOJI_CODES: Record<string, string> = {
// Schools
elementary_school: '1f3eb', // 🏫
school: '1f3eb',
high_school: '1f393', // 🎓
// Education
school: '1f3eb', // 🏫
preschool: '1f476', // 👶
college_university: '1f393',
private_school: '1f3eb',
college_university: '1f393', // 🎓
library: '1f4da', // 📚
// Healthcare
doctor: '1f3e5', // 🏥
dentist: '1f9b7', // 🦷
pharmacy: '1f48a', // 💊
hospital: '1f3e5',
public_health_clinic: '1f3e5',
veterinary: '1f43e', // 🐾
nursing_home: '1f3e0', // 🏠
social_facility: '1f91d', // 🤝
// Transport
train_station: '1f689', // 🚉
bus_station: '1f68c', // 🚌
bus_stop: '1f68f', // 🚏
metro_station: '1f687', // 🚇
light_rail_and_subway_stations: '1f687',
// Parks
light_rail_station: '1f687',
tram_stop: '1f68a', // 🚊
ferry_terminal: '26f4', // ⛴
airport: '2708', // ✈
// Parks & Leisure
park: '1f333', // 🌳
national_park: '1f3de', // 🏞
nature_reserve: '1f33f', // 🌿
dog_park: '1f415', // 🐕
playground: '1f3a0', // 🎠
garden: '1f33a', // 🌺
sports_centre: '1f3c3', // 🏃
swimming_pool: '1f3ca', // 🏊
gym: '1f4aa', // 💪
golf_course: '26f3', // ⛳
marina: '26f5', // ⛵
// Emergency
police_department: '1f694', // 🚔
fire_department: '1f692', // 🚒
// Supermarkets
// Supermarkets & Grocery
supermarket: '1f6d2', // 🛒
grocery_store: '1f6d2',
convenience_store: '1f3ea', // 🏪
bakery: '1f35e', // 🍞
butcher: '1f969', // 🥩
greengrocer: '1f966', // 🥦
deli: '1f9c0', // 🧀
// Shopping
department_store: '1f3ec', // 🏬
clothing_store: '1f455', // 👕
shoe_store: '1f45f', // 👟
electronics_store: '1f4f1', // 📱
hardware_store: '1f527', // 🔧
furniture_store: '1fa91', // 🪑
bookshop: '1f4d6', // 📖
newsagent: '1f4f0', // 📰
charity_shop: '1f49c', // 💜
shopping_centre: '1f6cd', // 🛍
optician: '1f453', // 👓
off_licence: '1f37a', // 🍺
// Food & Drink
restaurant: '1f37d', // 🍽
cafe: '2615', // ☕
pub: '1f37b', // 🍻
bar: '1f378', // 🍸
fast_food: '1f354', // 🍔
food_court: '1f372', // 🍲
ice_cream: '1f366', // 🍦
beer_garden: '1f37a', // 🍺
// Personal Care
hairdresser: '1f487', // 💇
beauty_salon: '1f484', // 💄
laundry: '1f9fa', // 🧺
dry_cleaning: '1f455', // 👕
// Finance
bank: '1f3e6', // 🏦
atm: '1f4b3', // 💳
bureau_de_change: '1f4b1', // 💱
// Entertainment & Culture
cinema: '1f3ac', // 🎬
theatre: '1f3ad', // 🎭
nightclub: '1f483', // 💃
community_centre: '1f3db', // 🏛
arts_centre: '1f3a8', // 🎨
museum: '1f3db', // 🏛
gallery: '1f5bc', // 🖼
attraction: '2b50', // ⭐
zoo: '1f418', // 🐘
theme_park: '1f3a2', // 🎢
viewpoint: '1f301', // 🌁
// Accommodation
hotel: '1f3e8', // 🏨
hostel: '1f6cf', // 🛏
guest_house: '1f3e1', // 🏡
campsite: '26fa', // ⛺
caravan_site: '1f699', // 🚙
// Religion
place_of_worship: '1f6d0', // 🛐
// Government & Public
town_hall: '1f3db', // 🏛
courthouse: '2696', // ⚖
post_office: '1f4ee', // 📮
prison: '1f513', // 🔓
public_toilets: '1f6bb', // 🚻
// Automotive
petrol_station: '26fd', // ⛽
ev_charging: '1f50c', // 🔌
car_dealer: '1f697', // 🚗
car_repair: '1f527', // 🔧
parking: '1f17f', // 🅿
bicycle_parking: '1f6b2', // 🚲
// Recycling & Waste
recycling: '267b', // ♻
waste_disposal: '1f5d1', // 🗑
};
function getPOIIconUrl(category: string): string {
@ -57,29 +142,34 @@ function getPOIIconUrl(category: string): string {
// Tooltip emojis (these render fine in HTML)
const TOOLTIP_EMOJIS: Record<string, string> = {
elementary_school: '🏫',
school: '🏫',
high_school: '🎓',
preschool: '👶',
college_university: '🎓',
private_school: '🏫',
doctor: '👨‍⚕️',
dentist: '🦷',
pharmacy: '💊',
hospital: '🏥',
public_health_clinic: '🏥',
train_station: '🚉',
bus_station: '🚌',
metro_station: '🚇',
light_rail_and_subway_stations: '🚇',
park: '🌳',
national_park: '🏞️',
dog_park: '🐕',
police_department: '🚔',
fire_department: '🚒',
supermarket: '🛒',
grocery_store: '🛒',
convenience_store: '🏪',
school: '🏫', preschool: '👶', college_university: '🎓', library: '📚',
doctor: '🏥', dentist: '🦷', pharmacy: '💊', hospital: '🏥',
public_health_clinic: '🏥', veterinary: '🐾', nursing_home: '🏠', social_facility: '🤝',
train_station: '🚉', bus_station: '🚌', bus_stop: '🚏', metro_station: '🚇',
light_rail_station: '🚇', tram_stop: '🚊', ferry_terminal: '⛴️', airport: '✈️',
park: '🌳', national_park: '🏞️', nature_reserve: '🌿', dog_park: '🐕',
playground: '🎠', garden: '🌺', sports_centre: '🏃', swimming_pool: '🏊',
gym: '💪', golf_course: '⛳', marina: '⛵',
police_department: '🚔', fire_department: '🚒',
supermarket: '🛒', grocery_store: '🛒', convenience_store: '🏪',
bakery: '🍞', butcher: '🥩', greengrocer: '🥦', deli: '🧀',
department_store: '🏬', clothing_store: '👕', shoe_store: '👟',
electronics_store: '📱', hardware_store: '🔧', furniture_store: '🪑',
bookshop: '📖', newsagent: '📰', charity_shop: '💜', shopping_centre: '🛍️',
optician: '👓', off_licence: '🍺',
restaurant: '🍽️', cafe: '☕', pub: '🍻', bar: '🍸',
fast_food: '🍔', food_court: '🍲', ice_cream: '🍦', beer_garden: '🍺',
hairdresser: '💇', beauty_salon: '💄', laundry: '🧺', dry_cleaning: '👕',
bank: '🏦', atm: '💳', bureau_de_change: '💱',
cinema: '🎬', theatre: '🎭', nightclub: '💃', community_centre: '🏛️',
arts_centre: '🎨', museum: '🏛️', gallery: '🖼️', attraction: '⭐',
zoo: '🐘', theme_park: '🎢', viewpoint: '🌁',
hotel: '🏨', hostel: '🛏️', guest_house: '🏡', campsite: '⛺', caravan_site: '🚙',
place_of_worship: '🛐',
town_hall: '🏛️', courthouse: '⚖️', post_office: '📮', prison: '🔓', public_toilets: '🚻',
petrol_station: '⛽', ev_charging: '🔌', car_dealer: '🚗', car_repair: '🔧',
parking: '🅿️', bicycle_parking: '🚲',
recycling: '♻️', waste_disposal: '🗑️',
};
function getTooltipEmoji(category: string): string {
@ -158,7 +248,7 @@ function journeyTimeToColor(minutes: number | null | undefined): [number, number
}
function zoomToResolution(zoom: number): number {
if (zoom < 8.5) return 7;
if (zoom < 7) return 7;
if (zoom < 9.5) return 8;
if (zoom < 11) return 9;
if (zoom < 13) return 10;
@ -209,6 +299,22 @@ interface Dimensions {
height: number;
}
// First label layer in the Carto Positron style — hexagons render below this
const LABEL_LAYER_ID = 'waterway_label';
function DeckOverlay({
layers,
getTooltip,
}: {
layers: (H3HexagonLayer<HexagonData> | IconLayer<POI>)[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
getTooltip: any;
}) {
const overlay = useControl(() => new MapboxOverlay({ interleaved: true }));
overlay.setProps({ layers, getTooltip });
return null;
}
export default function Map({ data, pois, onViewChange, colorMode }: MapProps) {
const containerRef = useRef<HTMLDivElement>(null);
const [viewState, setViewState] = useState<ViewState>(INITIAL_VIEW);
@ -240,12 +346,23 @@ export default function Map({ data, pois, onViewChange, colorMode }: MapProps) {
onViewChange({ resolution, bounds, zoom: viewState.zoom });
}, [viewState, dimensions, onViewChange]);
const handleViewStateChange = useCallback((params: { viewState: unknown }) => {
const newViewState = params.viewState as ViewState;
setViewState(newViewState);
const handleMove = useCallback((evt: { viewState: ViewState }) => {
setViewState(evt.viewState);
}, []);
// Popup state for POI hover (using screen coordinates)
// Make place labels more legible over the colored hexagons
const handleMapLoad = useCallback((evt: { target: MapRef['getMap'] extends () => infer M ? M : never }) => {
const map = evt.target;
for (const layer of map.getStyle().layers || []) {
if (layer.type !== 'symbol') continue;
// Stronger white halo so text pops over hex fills
map.setPaintProperty(layer.id, 'text-halo-color', 'rgba(255,255,255,1)');
map.setPaintProperty(layer.id, 'text-halo-width', 2);
map.setPaintProperty(layer.id, 'text-color', '#222');
}
}, []);
// Popup state for POI hover
const [popupInfo, setPopupInfo] = useState<{
x: number;
y: number;
@ -283,6 +400,9 @@ export default function Map({ data, pois, onViewChange, colorMode }: MapProps) {
pickable: true,
opacity: 0.5,
highPrecision: true,
// Render below labels so road names, place names etc. stay visible
// @ts-expect-error beforeId is a MapboxOverlay interleave prop, not typed in LayerProps
beforeId: LABEL_LAYER_ID,
}),
new IconLayer<POI>({
id: 'poi-icons',
@ -303,7 +423,6 @@ export default function Map({ data, pois, onViewChange, colorMode }: MapProps) {
[data, pois, handlePoiHover, colorMode]
);
// Tooltip for hexagons only (POIs use MapLibre popup)
const getTooltip = useCallback(({ object }: { object?: HexagonData }) => {
if (!object || !('h3' in object)) return null;
@ -339,15 +458,15 @@ export default function Map({ data, pois, onViewChange, colorMode }: MapProps) {
return (
<div className="flex-1 h-full relative" ref={containerRef}>
<DeckGL
viewState={viewState}
controller
layers={layers}
onViewStateChange={handleViewStateChange as never}
getTooltip={getTooltip as never}
<MapGL
{...viewState}
onMove={handleMove}
onLoad={handleMapLoad as never}
mapStyle={MAP_STYLE}
style={{ width: '100%', height: '100%' }}
>
<MapGL mapStyle={MAP_STYLE} />
</DeckGL>
<DeckOverlay layers={layers} getTooltip={getTooltip as never} />
</MapGL>
{popupInfo && (
<div
className="absolute pointer-events-none bg-white rounded shadow-lg p-2 text-sm"

View file

@ -57,13 +57,9 @@ export interface POIResponse {
features: POI[];
}
export const POI_CATEGORY_GROUPS = [
'schools',
'healthcare',
'transport',
'parks',
'emergency',
'supermarkets',
] as const;
export interface POICategoryInfo {
emoji: string;
label: string;
}
export type POICategoryGroup = (typeof POI_CATEGORY_GROUPS)[number];
export type POICategoriesMap = Record<string, POICategoryInfo>;

View file

181
pipeline/pois/__main__.py Normal file
View file

@ -0,0 +1,181 @@
"""Single-pass POI extraction from OSM PBF file using pyosmium."""
import json
import urllib.request
import osmium
import polars as pl
from tqdm import tqdm
from .config import (
GB_PBF_FILE,
GEOFABRIK_GB_URL,
OSM_TAG_MAPPING,
OUTPUT_FILE,
TAG_KEYS_TO_CHECK,
UK_BBOX_EAST,
UK_BBOX_NORTH,
UK_BBOX_SOUTH,
UK_BBOX_WEST,
)
# Approximate element count for the GB PBF extract (for progress estimation).
ESTIMATED_ELEMENTS = 500_000_000
def download_pbf() -> None:
"""Download Great Britain PBF extract from Geofabrik."""
GB_PBF_FILE.parent.mkdir(parents=True, exist_ok=True)
tmp = GB_PBF_FILE.with_suffix(".pbf.tmp")
print(f"Downloading {GEOFABRIK_GB_URL}")
with (
tqdm(unit="B", unit_scale=True, desc="Downloading") as bar,
urllib.request.urlopen(GEOFABRIK_GB_URL) as resp,
open(tmp, "wb") as f,
):
length = resp.headers.get("Content-Length")
if length:
bar.total = int(length)
while chunk := resp.read(1 << 20):
f.write(chunk)
bar.update(len(chunk))
tmp.rename(GB_PBF_FILE)
print(f"Saved to {GB_PBF_FILE}")
class POIHandler(osmium.SimpleHandler):
"""Streams OSM data, filters to UK bbox, extracts matching POIs."""
def __init__(self, progress: tqdm) -> None:
super().__init__()
self.pois: list[dict] = []
self._poi_count = 0
self._progress = progress
def _in_uk(self, lat: float, lon: float) -> bool:
return (
UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
)
def _match_tags(self, tags: osmium.osm.TagList) -> str | None:
for key in TAG_KEYS_TO_CHECK:
if key in tags:
value = tags[key]
if value in TAG_KEYS_TO_CHECK[key]:
return OSM_TAG_MAPPING[(key, value)]
return None
def _get_name(self, tags: osmium.osm.TagList) -> str:
return tags.get("name:en", tags.get("name", ""))
def _tags_to_json(self, tags: osmium.osm.TagList) -> str:
return json.dumps({tag.k: tag.v for tag in tags})
def _add_poi(
self, osm_id: str, tags: osmium.osm.TagList, category: str, lat: float, lng: float
) -> None:
self.pois.append(
{
"id": osm_id,
"name": self._get_name(tags),
"category": category,
"lat": lat,
"lng": lng,
"osm_tags": self._tags_to_json(tags),
}
)
self._poi_count += 1
self._progress.set_postfix(pois=f"{self._poi_count:,}", refresh=False)
def _tick(self) -> None:
self._progress.update(1)
def node(self, n: osmium.osm.Node) -> None:
self._tick()
if not n.location.valid:
return
lat, lon = n.location.lat, n.location.lon
if not self._in_uk(lat, lon):
return
category = self._match_tags(n.tags)
if category:
self._add_poi(f"n{n.id}", n.tags, category, lat, lon)
def way(self, w: osmium.osm.Way) -> None:
self._tick()
category = self._match_tags(w.tags)
if not category:
return
lats = []
lons = []
for node in w.nodes:
try:
lats.append(node.location.lat)
lons.append(node.location.lon)
except osmium.InvalidLocationError:
continue
if not lats:
return
centroid_lat = sum(lats) / len(lats)
centroid_lng = sum(lons) / len(lons)
if not self._in_uk(centroid_lat, centroid_lng):
return
self._add_poi(f"w{w.id}", w.tags, category, centroid_lat, centroid_lng)
def main() -> None:
if not GB_PBF_FILE.exists():
download_pbf()
print(f"=== POI Extraction from {GB_PBF_FILE} ===")
print(
f"UK bbox: ({UK_BBOX_WEST}, {UK_BBOX_SOUTH}, {UK_BBOX_EAST}, {UK_BBOX_NORTH})"
)
print(f"Categories: {len(OSM_TAG_MAPPING)}")
print()
with tqdm(
total=ESTIMATED_ELEMENTS,
unit=" elements",
unit_scale=True,
desc="Streaming",
smoothing=0.05,
mininterval=1.0,
) as progress:
handler = POIHandler(progress)
handler.apply_file(str(GB_PBF_FILE), locations=True)
print(f"Extracted {len(handler.pois):,} POIs")
if not handler.pois:
print("No POIs found.")
return
df = pl.DataFrame(handler.pois)
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
df.write_parquet(OUTPUT_FILE)
print(f"Saved to {OUTPUT_FILE}")
print("\n=== Summary ===")
print(f"Total POIs: {len(df):,}")
print("\nPOIs by category:")
category_counts = (
df.group_by("category")
.agg(pl.len().alias("count"))
.sort("count", descending=True)
)
for row in category_counts.iter_rows(named=True):
print(f" {row['category']}: {row['count']:,}")
if __name__ == "__main__":
main()

147
pipeline/pois/config.py Normal file
View file

@ -0,0 +1,147 @@
"""Configuration for POI extraction from OpenStreetMap."""
from pathlib import Path
# File paths
DATA_DIR = Path(__file__).parent.parent.parent / "data_sources"
GB_PBF_FILE = DATA_DIR / "great-britain-latest.osm.pbf"
OUTPUT_FILE = DATA_DIR / "uk_pois.parquet"
# Geofabrik download URL for Great Britain (~1.1GB PBF, much faster than planet XML)
GEOFABRIK_GB_URL = (
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
)
# UK bounding box (west, south, east, north) — used for way centroid filtering
UK_BBOX_WEST = -7.57
UK_BBOX_SOUTH = 49.96
UK_BBOX_EAST = 1.68
UK_BBOX_NORTH = 58.64
# OSM tag mapping to categories
# Maps (tag_key, tag_value) -> category name
OSM_TAG_MAPPING: dict[tuple[str, str], str] = {
# Education
("amenity", "school"): "school",
("amenity", "kindergarten"): "preschool",
("amenity", "college"): "college_university",
("amenity", "university"): "college_university",
("amenity", "library"): "library",
("amenity", "language_school"): "school",
("amenity", "music_school"): "school",
("amenity", "driving_school"): "school",
# Healthcare
("amenity", "hospital"): "hospital",
("amenity", "clinic"): "public_health_clinic",
("amenity", "doctors"): "doctor",
("amenity", "dentist"): "dentist",
("amenity", "pharmacy"): "pharmacy",
("amenity", "veterinary"): "veterinary",
("amenity", "nursing_home"): "nursing_home",
("amenity", "social_facility"): "social_facility",
# Transport
("railway", "station"): "train_station",
("railway", "halt"): "train_station",
("railway", "tram_stop"): "tram_stop",
("amenity", "bus_station"): "bus_station",
("amenity", "ferry_terminal"): "ferry_terminal",
("public_transport", "station"): "train_station",
("public_transport", "stop_position"): "bus_stop",
("station", "subway"): "metro_station",
("station", "light_rail"): "light_rail_station",
("aeroway", "aerodrome"): "airport",
("highway", "bus_stop"): "bus_stop",
# Parks & Leisure
("leisure", "park"): "park",
("leisure", "nature_reserve"): "nature_reserve",
("leisure", "dog_park"): "dog_park",
("leisure", "playground"): "playground",
("leisure", "sports_centre"): "sports_centre",
("leisure", "swimming_pool"): "swimming_pool",
("leisure", "fitness_centre"): "gym",
("leisure", "golf_course"): "golf_course",
("leisure", "garden"): "garden",
("leisure", "marina"): "marina",
("boundary", "national_park"): "national_park",
# Emergency
("amenity", "police"): "police_department",
("amenity", "fire_station"): "fire_department",
# Shopping
("shop", "supermarket"): "supermarket",
("shop", "convenience"): "convenience_store",
("shop", "grocery"): "grocery_store",
("shop", "bakery"): "bakery",
("shop", "butcher"): "butcher",
("shop", "greengrocer"): "greengrocer",
("shop", "deli"): "deli",
("shop", "department_store"): "department_store",
("shop", "clothes"): "clothing_store",
("shop", "shoes"): "shoe_store",
("shop", "electronics"): "electronics_store",
("shop", "hardware"): "hardware_store",
("shop", "furniture"): "furniture_store",
("shop", "car"): "car_dealer",
("shop", "car_repair"): "car_repair",
("shop", "hairdresser"): "hairdresser",
("shop", "beauty"): "beauty_salon",
("shop", "optician"): "optician",
("shop", "newsagent"): "newsagent",
("shop", "books"): "bookshop",
("shop", "charity"): "charity_shop",
("shop", "alcohol"): "off_licence",
("shop", "laundry"): "laundry",
("shop", "dry_cleaning"): "dry_cleaning",
("shop", "mall"): "shopping_centre",
# Food & Drink
("amenity", "restaurant"): "restaurant",
("amenity", "cafe"): "cafe",
("amenity", "pub"): "pub",
("amenity", "bar"): "bar",
("amenity", "fast_food"): "fast_food",
("amenity", "food_court"): "food_court",
("amenity", "ice_cream"): "ice_cream",
("amenity", "biergarten"): "beer_garden",
# Finance
("amenity", "bank"): "bank",
("amenity", "atm"): "atm",
("amenity", "bureau_de_change"): "bureau_de_change",
# Entertainment & Culture
("amenity", "cinema"): "cinema",
("amenity", "theatre"): "theatre",
("amenity", "nightclub"): "nightclub",
("amenity", "community_centre"): "community_centre",
("amenity", "arts_centre"): "arts_centre",
("tourism", "museum"): "museum",
("tourism", "gallery"): "gallery",
("tourism", "attraction"): "attraction",
("tourism", "zoo"): "zoo",
("tourism", "theme_park"): "theme_park",
("tourism", "viewpoint"): "viewpoint",
# Accommodation
("tourism", "hotel"): "hotel",
("tourism", "hostel"): "hostel",
("tourism", "guest_house"): "guest_house",
("tourism", "camp_site"): "campsite",
("tourism", "caravan_site"): "caravan_site",
# Religion
("amenity", "place_of_worship"): "place_of_worship",
# Government & Public
("amenity", "townhall"): "town_hall",
("amenity", "courthouse"): "courthouse",
("amenity", "post_office"): "post_office",
("amenity", "prison"): "prison",
("amenity", "recycling"): "recycling",
("amenity", "waste_disposal"): "waste_disposal",
("amenity", "toilets"): "public_toilets",
# Fuel
("amenity", "fuel"): "petrol_station",
("amenity", "charging_station"): "ev_charging",
# Parking
("amenity", "parking"): "parking",
("amenity", "bicycle_parking"): "bicycle_parking",
}
# Build reverse lookup: tag_key -> set of tag_values we care about
TAG_KEYS_TO_CHECK: dict[str, set[str]] = {}
for (key, value), _ in OSM_TAG_MAPPING.items():
TAG_KEYS_TO_CHECK.setdefault(key, set()).add(value)

View file

@ -6,31 +6,47 @@ import polars as pl
from pipeline.config import AGGREGATES_DIR, H3_RESOLUTIONS, PROCESSED_DIR
JOURNEY_COLS = [
"public_transport_easy_minutes",
"public_transport_quick_minutes",
"cycling_minutes",
]
AGGREGATE_COLS = [
"median_pt_easy_minutes",
"median_pt_quick_minutes",
"median_cycling_minutes",
"median_journey_minutes",
]
def aggregate_journey_times(
journey_times_path: Path | None = None,
postcodes_h3_path: Path | None = None,
output_dir: Path | None = None,
aggregates_dir: Path | None = None,
) -> list[Path]:
"""
Aggregate journey times by H3 cells at all resolutions.
Add journey times to existing H3 aggregate parquet files.
Joins journey_times_bank.parquet with postcodes_h3.parquet on postcode,
then groups by H3 cell to compute median journey time.
Joins journey_times_bank_checkpoint.parquet with postcodes_h3.parquet on postcode,
aggregates by H3 cell, then merges into existing res{N}.parquet files.
"""
journey_times_path = journey_times_path or PROCESSED_DIR / "journey_times_bank.parquet"
journey_times_path = (
journey_times_path
or PROCESSED_DIR / "journey_times_bank_checkpoint.parquet"
)
postcodes_h3_path = postcodes_h3_path or PROCESSED_DIR / "postcodes_h3.parquet"
output_dir = output_dir or AGGREGATES_DIR
output_dir.mkdir(parents=True, exist_ok=True)
aggregates_dir = aggregates_dir or AGGREGATES_DIR
# Load journey times data
journey_df = pl.read_parquet(journey_times_path).select(
["postcode", "public_transport_minutes"]
["postcode"] + JOURNEY_COLS
)
# Filter out null journey times
journey_df = journey_df.filter(pl.col("public_transport_minutes").is_not_null())
# Filter out rows where all journey time columns are null
journey_df = journey_df.filter(
pl.any_horizontal(pl.col(c).is_not_null() for c in JOURNEY_COLS)
)
if journey_df.height == 0:
print("No valid journey times found")
@ -48,31 +64,63 @@ def aggregate_journey_times(
print(f"Joined {joined_df.height} postcodes with journey times")
saved_paths = []
updated_paths = []
for resolution in H3_RESOLUTIONS:
h3_col = f"h3_res{resolution}"
parquet_path = aggregates_dir / f"res{resolution}.parquet"
if not parquet_path.exists():
print(f"Skipping resolution {resolution} - {parquet_path} not found")
continue
if h3_col not in joined_df.columns:
print(f"Skipping resolution {resolution} - column {h3_col} not found")
continue
# Aggregate by H3 cell - compute median journey time
agg_df = (
# Aggregate journey times by H3 cell
journey_agg = (
joined_df.group_by(h3_col)
.agg(
pl.col("public_transport_minutes").median().alias("median_journey_minutes"),
pl.col("public_transport_minutes").count().alias("journey_count"),
pl.col("public_transport_easy_minutes")
.median()
.alias("median_pt_easy_minutes"),
pl.col("public_transport_quick_minutes")
.median()
.alias("median_pt_quick_minutes"),
pl.col("cycling_minutes")
.median()
.alias("median_cycling_minutes"),
pl.col("public_transport_quick_minutes")
.median()
.alias("median_journey_minutes"),
)
.rename({h3_col: "h3"})
)
output_path = output_dir / f"journey_times_res{resolution}.parquet"
agg_df.write_parquet(output_path)
saved_paths.append(output_path)
print(f"Saved {agg_df.height} cells to {output_path}")
# Load existing parquet
existing_df = pl.read_parquet(parquet_path)
return saved_paths
# Drop existing journey time columns if present
existing_df = existing_df.drop(
[c for c in AGGREGATE_COLS if c in existing_df.columns]
)
# Left join journey times onto existing data
updated_df = existing_df.join(journey_agg, on="h3", how="left")
# Save back to parquet
updated_df.write_parquet(parquet_path)
updated_paths.append(parquet_path)
matched = updated_df.filter(
pl.col("median_journey_minutes").is_not_null()
).height
print(
f"Updated {parquet_path.name}: {matched} rows with journey times "
f"(out of {updated_df.height} total)"
)
return updated_paths
if __name__ == "__main__":

View file

@ -5,6 +5,7 @@ import polars as pl
from pipeline.sources.postcodes import save_postcodes
from pipeline.sources.property_prices import PropertyPricesSource
from pipeline.processors.h3_aggregator import save_aggregates
from pipeline.processors.journey_times_aggregator import aggregate_journey_times
def run_pipeline():
@ -14,22 +15,31 @@ def run_pipeline():
print("=" * 60)
# Step 1: Process postcodes with H3 indices
print("\n[1/3] Processing postcodes with H3 indices...")
print("\n[1/4] Processing postcodes with H3 indices...")
postcodes_path = save_postcodes()
print(f" Saved: {postcodes_path}")
print("\n[2/3] Processing property prices...")
print("\n[2/4] Processing property prices...")
postcodes = pl.scan_parquet(postcodes_path)
property_source = PropertyPricesSource()
properties = property_source.process(postcodes)
print(" Joined property prices with postcodes")
print("\n[3/3] Aggregating at H3 resolutions...")
print("\n[3/4] Aggregating at H3 resolutions...")
saved_paths = save_aggregates(properties)
for path in saved_paths:
size_mb = path.stat().st_size / (1024 * 1024)
print(f" Saved: {path.name} ({size_mb:.1f} MB)")
print("\n[4/4] Adding journey times to aggregates...")
updated_paths = aggregate_journey_times()
if updated_paths:
for path in updated_paths:
size_mb = path.stat().st_size / (1024 * 1024)
print(f" Updated: {path.name} ({size_mb:.1f} MB)")
else:
print(" Skipped (no journey time data found)")
if __name__ == "__main__":
run_pipeline()

View file

@ -77,14 +77,28 @@ def query_hexagons_cached(
# Filter by year range
df = df.filter((pl.col("year") >= min_year) & (pl.col("year") <= max_year))
# Check which journey time columns exist
journey_cols = [
"median_journey_minutes",
"median_pt_easy_minutes",
"median_pt_quick_minutes",
"median_cycling_minutes",
]
available_journey_cols = [c for c in journey_cols if c in df.columns]
# Aggregate across years (weighted by count)
df = df.group_by("h3").agg(
agg_exprs = [
pl.col("count").sum().alias("count"),
(pl.col("avg_price") * pl.col("count")).sum().alias("weighted_price_sum"),
pl.col("median_price").median().alias("median_price"),
pl.col("min_price").min().alias("min_price"),
pl.col("max_price").max().alias("max_price"),
)
]
for jc in available_journey_cols:
# Journey time is same across years, just take first non-null
agg_exprs.append(pl.col(jc).first())
df = df.group_by("h3").agg(agg_exprs)
# Calculate weighted average price
df = df.with_columns(
@ -97,8 +111,7 @@ def query_hexagons_cached(
)
# Build response efficiently using Polars
df = df.select(
[
select_cols = [
pl.col("h3"),
pl.col("count"),
pl.col("avg_price").round(2),
@ -106,7 +119,10 @@ def query_hexagons_cached(
pl.col("min_price"),
pl.col("max_price"),
]
)
for jc in available_journey_cols:
select_cols.append(pl.col(jc).round(0))
df = df.select(select_cols)
return df.to_dicts()

View file

@ -1,9 +1,5 @@
"""POI (Points of Interest) API endpoint."""
import os
os.environ["POLARS_UNKNOWN_EXTENSION_TYPE_BEHAVIOR"] = "load_as_storage"
from pathlib import Path
from fastapi import APIRouter, Query
@ -13,36 +9,190 @@ router = APIRouter()
DATA_FILE = Path("data_sources/uk_pois.parquet")
# Categories useful for property buyers
POI_CATEGORIES = {
"schools": [
"elementary_school",
"school",
"high_school",
"preschool",
"college_university",
"private_school",
],
"healthcare": [
# Category groups with emoji and member categories
POI_CATEGORY_GROUPS: dict[str, dict] = {
"schools": {
"emoji": "🏫",
"label": "Schools",
"categories": ["school", "preschool", "college_university", "library"],
},
"healthcare": {
"emoji": "🏥",
"label": "Healthcare",
"categories": [
"doctor",
"dentist",
"pharmacy",
"hospital",
"public_health_clinic",
"veterinary",
"nursing_home",
"social_facility",
],
"transport": [
},
"transport": {
"emoji": "🚉",
"label": "Transport",
"categories": [
"train_station",
"bus_station",
"bus_stop",
"metro_station",
"light_rail_and_subway_stations",
"light_rail_station",
"tram_stop",
"ferry_terminal",
"airport",
],
"parks": ["park", "national_park", "dog_park"],
"emergency": ["police_department", "fire_department"],
"supermarkets": ["supermarket", "grocery_store", "convenience_store"],
},
"parks": {
"emoji": "🌳",
"label": "Parks & Leisure",
"categories": [
"park",
"national_park",
"nature_reserve",
"dog_park",
"playground",
"garden",
"sports_centre",
"swimming_pool",
"gym",
"golf_course",
"marina",
],
},
"emergency": {
"emoji": "🚨",
"label": "Emergency",
"categories": ["police_department", "fire_department"],
},
"supermarkets": {
"emoji": "🛒",
"label": "Supermarkets & Grocery",
"categories": [
"supermarket",
"grocery_store",
"convenience_store",
"bakery",
"butcher",
"greengrocer",
"deli",
],
},
"shopping": {
"emoji": "🛍️",
"label": "Shopping",
"categories": [
"department_store",
"clothing_store",
"shoe_store",
"electronics_store",
"hardware_store",
"furniture_store",
"bookshop",
"newsagent",
"charity_shop",
"shopping_centre",
"optician",
"off_licence",
],
},
"food_drink": {
"emoji": "🍽️",
"label": "Food & Drink",
"categories": [
"restaurant",
"cafe",
"pub",
"bar",
"fast_food",
"food_court",
"ice_cream",
"beer_garden",
],
},
"personal_care": {
"emoji": "💇",
"label": "Personal Care",
"categories": [
"hairdresser",
"beauty_salon",
"laundry",
"dry_cleaning",
],
},
"finance": {
"emoji": "🏦",
"label": "Finance",
"categories": ["bank", "atm", "bureau_de_change"],
},
"entertainment": {
"emoji": "🎭",
"label": "Entertainment & Culture",
"categories": [
"cinema",
"theatre",
"nightclub",
"community_centre",
"arts_centre",
"museum",
"gallery",
"attraction",
"zoo",
"theme_park",
"viewpoint",
],
},
"accommodation": {
"emoji": "🏨",
"label": "Accommodation",
"categories": [
"hotel",
"hostel",
"guest_house",
"campsite",
"caravan_site",
],
},
"religion": {
"emoji": "🛐",
"label": "Places of Worship",
"categories": ["place_of_worship"],
},
"government": {
"emoji": "🏛️",
"label": "Government & Public",
"categories": [
"town_hall",
"courthouse",
"post_office",
"prison",
"public_toilets",
],
},
"automotive": {
"emoji": "",
"label": "Automotive",
"categories": [
"petrol_station",
"ev_charging",
"car_dealer",
"car_repair",
"parking",
"bicycle_parking",
],
},
"recycling": {
"emoji": "♻️",
"label": "Recycling & Waste",
"categories": ["recycling", "waste_disposal"],
},
}
# Flatten for quick lookup
ALL_CATEGORIES = {cat for cats in POI_CATEGORIES.values() for cat in cats}
ALL_CATEGORIES = {
cat for group in POI_CATEGORY_GROUPS.values() for cat in group["categories"]
}
# Cache the dataframe
_df_cache: pl.DataFrame | None = None
@ -55,14 +205,9 @@ def get_df() -> pl.DataFrame | None:
if not DATA_FILE.exists():
return None
df = pl.read_parquet(DATA_FILE)
# Extract fields we need and filter to relevant categories
_df_cache = df.select(
pl.col("id"),
pl.col("names").struct.field("primary").alias("name"),
pl.col("categories").struct.field("primary").alias("category"),
pl.col("bbox").struct.field("xmin").alias("lng"),
pl.col("bbox").struct.field("ymin").alias("lat"),
).filter(pl.col("category").is_in(ALL_CATEGORIES))
_df_cache = df.select("id", "name", "category", "lat", "lng").filter(
pl.col("category").is_in(ALL_CATEGORIES)
)
return _df_cache
@ -83,23 +228,20 @@ async def get_pois(
if df is None:
return {"features": []}
# Parse bounds
try:
south, west, north, east = map(float, bounds.split(","))
except ValueError:
return {"features": []}
# Get categories to include
requested_groups = [g.strip() for g in categories.split(",")]
cats_to_include = set()
for group in requested_groups:
if group in POI_CATEGORIES:
cats_to_include.update(POI_CATEGORIES[group])
if group in POI_CATEGORY_GROUPS:
cats_to_include.update(POI_CATEGORY_GROUPS[group]["categories"])
if not cats_to_include:
return {"features": []}
# Filter by bounds and categories
filtered = df.filter(
(pl.col("lat") >= south)
& (pl.col("lat") <= north)
@ -108,7 +250,6 @@ async def get_pois(
& (pl.col("category").is_in(cats_to_include))
)
# Limit results to avoid overwhelming the frontend
MAX_POIS = 5000
if len(filtered) > MAX_POIS:
filtered = filtered.sample(n=MAX_POIS, seed=42)
@ -118,5 +259,10 @@ async def get_pois(
@router.get("/poi-categories")
async def get_poi_categories() -> dict:
"""Get available POI category groups."""
return {"categories": list(POI_CATEGORIES.keys())}
"""Get available POI category groups with emoji and labels."""
return {
"categories": {
key: {"emoji": group["emoji"], "label": group["label"]}
for key, group in POI_CATEGORY_GROUPS.items()
}
}