perfect-postcode/server/routes/pois.py

322 lines
8.5 KiB
Python

"""POI (Points of Interest) API endpoint."""
from pathlib import Path
from fastapi import APIRouter, Query
import polars as pl
router = APIRouter()
DATA_FILE = Path("data_sources/uk_pois.parquet")
# Group definitions: maps a group key to its display metadata and the
# individual POI categories it contains. Categories are matched against
# the values that actually exist in the loaded parquet so that the
# selector only shows groups with real data.
_GROUP_DEFS: dict[str, dict] = {
"schools": {
"emoji": "🏫",
"label": "Schools",
"categories": ["school", "preschool", "college_university", "library"],
},
"healthcare": {
"emoji": "🏥",
"label": "Healthcare",
"categories": [
"doctor",
"dentist",
"pharmacy",
"hospital",
"public_health_clinic",
"veterinary",
"nursing_home",
"social_facility",
],
},
"transport": {
"emoji": "🚉",
"label": "Transport",
"categories": [
"train_station",
"bus_station",
"bus_stop",
"metro_station",
"light_rail_station",
"tram_stop",
"ferry_terminal",
"airport",
],
},
"parks": {
"emoji": "🌳",
"label": "Parks & Leisure",
"categories": [
"park",
"national_park",
"nature_reserve",
"dog_park",
"playground",
"garden",
"sports_centre",
"swimming_pool",
"gym",
"golf_course",
"marina",
],
},
"emergency": {
"emoji": "🚨",
"label": "Emergency",
"categories": ["police_department", "fire_department"],
},
"supermarkets": {
"emoji": "🛒",
"label": "Supermarkets & Grocery",
"categories": [
"supermarket",
"grocery_store",
"convenience_store",
"bakery",
"butcher",
"greengrocer",
"deli",
],
},
"shopping": {
"emoji": "🛍️",
"label": "Shopping",
"categories": [
"department_store",
"clothing_store",
"shoe_store",
"electronics_store",
"hardware_store",
"furniture_store",
"bookshop",
"newsagent",
"charity_shop",
"shopping_centre",
"optician",
"off_licence",
],
},
"food_drink": {
"emoji": "🍽️",
"label": "Food & Drink",
"categories": [
"restaurant",
"cafe",
"pub",
"bar",
"fast_food",
"food_court",
"ice_cream",
"beer_garden",
],
},
"personal_care": {
"emoji": "💇",
"label": "Personal Care",
"categories": [
"hairdresser",
"beauty_salon",
"laundry",
"dry_cleaning",
],
},
"finance": {
"emoji": "🏦",
"label": "Finance",
"categories": ["bank", "atm", "bureau_de_change"],
},
"entertainment": {
"emoji": "🎭",
"label": "Entertainment & Culture",
"categories": [
"cinema",
"theatre",
"nightclub",
"community_centre",
"arts_centre",
"museum",
"gallery",
"attraction",
"zoo",
"theme_park",
"viewpoint",
],
},
"accommodation": {
"emoji": "🏨",
"label": "Accommodation",
"categories": [
"hotel",
"hostel",
"guest_house",
"campsite",
"caravan_site",
],
},
"religion": {
"emoji": "🛐",
"label": "Places of Worship",
"categories": ["place_of_worship"],
},
"government": {
"emoji": "🏛️",
"label": "Government & Public",
"categories": [
"town_hall",
"courthouse",
"post_office",
"prison",
"public_toilets",
],
},
"automotive": {
"emoji": "",
"label": "Automotive",
"categories": [
"petrol_station",
"ev_charging",
"car_dealer",
"car_repair",
"parking",
"bicycle_parking",
],
},
"recycling": {
"emoji": "♻️",
"label": "Recycling & Waste",
"categories": ["recycling", "waste_disposal"],
},
}
# Built at startup from the data — only groups whose member categories
# actually appear in the parquet file are included.
_active_groups: dict[str, dict] = {}
# Reverse lookup: category value -> group key (built at startup)
_cat_to_group: dict[str, str] = {}
# Cache the dataframe
_df_cache: pl.DataFrame | None = None
def _load_and_build() -> pl.DataFrame | None:
"""Load the parquet, build category groups from actual data."""
global _df_cache, _active_groups, _cat_to_group
if not DATA_FILE.exists():
return None
df = pl.read_parquet(DATA_FILE).select("id", "name", "category", "lat", "lng")
# Distinct categories present in the data
data_categories: set[str] = set(
df.select("category").unique().to_series().to_list()
)
# Per-category counts for the response
counts: dict[str, int] = dict(
df.group_by("category")
.agg(pl.len().alias("n"))
.iter_rows()
)
# Build reverse map from every known category to its group
cat_to_group: dict[str, str] = {}
for key, gdef in _GROUP_DEFS.items():
for cat in gdef["categories"]:
cat_to_group[cat] = key
# Only keep categories that belong to a known group
known_categories = data_categories & cat_to_group.keys()
# Build active groups — only those with at least one matching category
active: dict[str, dict] = {}
for key, gdef in _GROUP_DEFS.items():
present = [c for c in gdef["categories"] if c in known_categories]
if present:
active[key] = {
"emoji": gdef["emoji"],
"label": gdef["label"],
"categories": present,
"count": sum(counts.get(c, 0) for c in present),
}
_active_groups = active
_cat_to_group = cat_to_group
# Filter dataframe to only known categories
_df_cache = df.filter(pl.col("category").is_in(known_categories))
return _df_cache
def get_df() -> pl.DataFrame | None:
"""Return cached POI dataframe, loading if necessary."""
if _df_cache is None:
return _load_and_build()
return _df_cache
def preload_pois() -> None:
"""Preload POI data on startup."""
df = _load_and_build()
if df is not None:
n_groups = len(_active_groups)
print(f"Loaded {len(df):,} POIs across {n_groups} category groups")
@router.get("/pois")
async def get_pois(
categories: str = Query(..., description="Comma-separated category groups"),
bounds: str = Query(..., description="Bounding box: south,west,north,east"),
) -> dict:
"""Get POIs within bounds for specified category groups."""
df = get_df()
if df is None:
return {"features": []}
try:
south, west, north, east = map(float, bounds.split(","))
except ValueError:
return {"features": []}
requested_groups = [g.strip() for g in categories.split(",")]
cats_to_include: set[str] = set()
for group in requested_groups:
if group in _active_groups:
cats_to_include.update(_active_groups[group]["categories"])
if not cats_to_include:
return {"features": []}
filtered = df.filter(
(pl.col("lat") >= south)
& (pl.col("lat") <= north)
& (pl.col("lng") >= west)
& (pl.col("lng") <= east)
& (pl.col("category").is_in(cats_to_include))
)
MAX_POIS = 5000
if len(filtered) > MAX_POIS:
filtered = filtered.sample(n=MAX_POIS, seed=42)
return {"features": filtered.to_dicts()}
@router.get("/poi-categories")
async def get_poi_categories() -> dict:
"""Get available POI category groups derived from loaded data."""
return {
"categories": {
key: {
"emoji": group["emoji"],
"label": group["label"],
"count": group["count"],
}
for key, group in _active_groups.items()
}
}