perfect-postcode/server/routes/pois.py

"""POI (Points of Interest) API endpoint."""

from pathlib import Path

from fastapi import APIRouter, Query
import polars as pl

router = APIRouter()

DATA_FILE = Path("data_sources/uk_pois.parquet")

# Group definitions: maps a group key to its display metadata and the
# individual POI categories it contains.  Categories are matched against
# the values that actually exist in the loaded parquet so that the
# selector only shows groups with real data.
_GROUP_DEFS: dict[str, dict] = {
    "schools": {
        "emoji": "🏫",
        "label": "Schools",
        "categories": ["school", "preschool", "college_university", "library"],
    },
    "healthcare": {
        "emoji": "🏥",
        "label": "Healthcare",
        "categories": [
            "doctor",
            "dentist",
            "pharmacy",
            "hospital",
            "public_health_clinic",
            "veterinary",
            "nursing_home",
            "social_facility",
        ],
    },
    "transport": {
        "emoji": "🚉",
        "label": "Transport",
        "categories": [
            "train_station",
            "bus_station",
            "bus_stop",
            "metro_station",
            "light_rail_station",
            "tram_stop",
            "ferry_terminal",
            "airport",
        ],
    },
    "parks": {
        "emoji": "🌳",
        "label": "Parks & Leisure",
        "categories": [
            "park",
            "national_park",
            "nature_reserve",
            "dog_park",
            "playground",
            "garden",
            "sports_centre",
            "swimming_pool",
            "gym",
            "golf_course",
            "marina",
        ],
    },
    "emergency": {
        "emoji": "🚨",
        "label": "Emergency",
        "categories": ["police_department", "fire_department"],
    },
    "supermarkets": {
        "emoji": "🛒",
        "label": "Supermarkets & Grocery",
        "categories": [
            "supermarket",
            "grocery_store",
            "convenience_store",
            "bakery",
            "butcher",
            "greengrocer",
            "deli",
        ],
    },
    "shopping": {
        "emoji": "🛍️",
        "label": "Shopping",
        "categories": [
            "department_store",
            "clothing_store",
            "shoe_store",
            "electronics_store",
            "hardware_store",
            "furniture_store",
            "bookshop",
            "newsagent",
            "charity_shop",
            "shopping_centre",
            "optician",
            "off_licence",
        ],
    },
    "food_drink": {
        "emoji": "🍽️",
        "label": "Food & Drink",
        "categories": [
            "restaurant",
            "cafe",
            "pub",
            "bar",
            "fast_food",
            "food_court",
            "ice_cream",
            "beer_garden",
        ],
    },
    "personal_care": {
        "emoji": "💇",
        "label": "Personal Care",
        "categories": [
            "hairdresser",
            "beauty_salon",
            "laundry",
            "dry_cleaning",
        ],
    },
    "finance": {
        "emoji": "🏦",
        "label": "Finance",
        "categories": ["bank", "atm", "bureau_de_change"],
    },
    "entertainment": {
        "emoji": "🎭",
        "label": "Entertainment & Culture",
        "categories": [
            "cinema",
            "theatre",
            "nightclub",
            "community_centre",
            "arts_centre",
            "museum",
            "gallery",
            "attraction",
            "zoo",
            "theme_park",
            "viewpoint",
        ],
    },
    "accommodation": {
        "emoji": "🏨",
        "label": "Accommodation",
        "categories": [
            "hotel",
            "hostel",
            "guest_house",
            "campsite",
            "caravan_site",
        ],
    },
    "religion": {
        "emoji": "🛐",
        "label": "Places of Worship",
        "categories": ["place_of_worship"],
    },
    "government": {
        "emoji": "🏛️",
        "label": "Government & Public",
        "categories": [
            "town_hall",
            "courthouse",
            "post_office",
            "prison",
            "public_toilets",
        ],
    },
    "automotive": {
        "emoji": "⛽",
        "label": "Automotive",
        "categories": [
            "petrol_station",
            "ev_charging",
            "car_dealer",
            "car_repair",
            "parking",
            "bicycle_parking",
        ],
    },
    "recycling": {
        "emoji": "♻️",
        "label": "Recycling & Waste",
        "categories": ["recycling", "waste_disposal"],
    },
}

# Built at startup from the data — only groups whose member categories
# actually appear in the parquet file are included.
_active_groups: dict[str, dict] = {}

# Reverse lookup: category value -> group key (built at startup)
_cat_to_group: dict[str, str] = {}

# Cache the dataframe
_df_cache: pl.DataFrame | None = None


def _load_and_build() -> pl.DataFrame | None:
    """Load the parquet, build category groups from actual data."""
    global _df_cache, _active_groups, _cat_to_group

    if not DATA_FILE.exists():
        return None

    df = pl.read_parquet(DATA_FILE).select("id", "name", "category", "lat", "lng")

    # Distinct categories present in the data
    data_categories: set[str] = set(
        df.select("category").unique().to_series().to_list()
    )

    # Per-category counts for the response
    counts: dict[str, int] = dict(
        df.group_by("category")
        .agg(pl.len().alias("n"))
        .iter_rows()
    )

    # Build reverse map from every known category to its group
    cat_to_group: dict[str, str] = {}
    for key, gdef in _GROUP_DEFS.items():
        for cat in gdef["categories"]:
            cat_to_group[cat] = key

    # Only keep categories that belong to a known group
    known_categories = data_categories & cat_to_group.keys()

    # Build active groups — only those with at least one matching category
    active: dict[str, dict] = {}
    for key, gdef in _GROUP_DEFS.items():
        present = [c for c in gdef["categories"] if c in known_categories]
        if present:
            active[key] = {
                "emoji": gdef["emoji"],
                "label": gdef["label"],
                "categories": present,
                "count": sum(counts.get(c, 0) for c in present),
            }

    _active_groups = active
    _cat_to_group = cat_to_group

    # Filter dataframe to only known categories
    _df_cache = df.filter(pl.col("category").is_in(known_categories))
    return _df_cache


def get_df() -> pl.DataFrame | None:
    """Return cached POI dataframe, loading if necessary."""
    if _df_cache is None:
        return _load_and_build()
    return _df_cache


def preload_pois() -> None:
    """Preload POI data on startup."""
    df = _load_and_build()
    if df is not None:
        n_groups = len(_active_groups)
        print(f"Loaded {len(df):,} POIs across {n_groups} category groups")


@router.get("/pois")
async def get_pois(
    categories: str = Query(..., description="Comma-separated category groups"),
    bounds: str = Query(..., description="Bounding box: south,west,north,east"),
) -> dict:
    """Get POIs within bounds for specified category groups."""
    df = get_df()
    if df is None:
        return {"features": []}

    try:
        south, west, north, east = map(float, bounds.split(","))
    except ValueError:
        return {"features": []}

    requested_groups = [g.strip() for g in categories.split(",")]
    cats_to_include: set[str] = set()
    for group in requested_groups:
        if group in _active_groups:
            cats_to_include.update(_active_groups[group]["categories"])

    if not cats_to_include:
        return {"features": []}

    filtered = df.filter(
        (pl.col("lat") >= south)
        & (pl.col("lat") <= north)
        & (pl.col("lng") >= west)
        & (pl.col("lng") <= east)
        & (pl.col("category").is_in(cats_to_include))
    )

    MAX_POIS = 5000
    if len(filtered) > MAX_POIS:
        filtered = filtered.sample(n=MAX_POIS, seed=42)

    return {"features": filtered.to_dicts()}


@router.get("/poi-categories")
async def get_poi_categories() -> dict:
    """Get available POI category groups derived from loaded data."""
    return {
        "categories": {
            key: {
                "emoji": group["emoji"],
                "label": group["label"],
                "count": group["count"],
            }
            for key, group in _active_groups.items()
        }
    }