"""POI (Points of Interest) API endpoint.""" from pathlib import Path from fastapi import APIRouter, Query import polars as pl router = APIRouter() DATA_FILE = Path("data_sources/uk_pois.parquet") # Group definitions: maps a group key to its display metadata and the # individual POI categories it contains. Categories are matched against # the values that actually exist in the loaded parquet so that the # selector only shows groups with real data. _GROUP_DEFS: dict[str, dict] = { "schools": { "emoji": "🏫", "label": "Schools", "categories": ["school", "preschool", "college_university", "library"], }, "healthcare": { "emoji": "🏥", "label": "Healthcare", "categories": [ "doctor", "dentist", "pharmacy", "hospital", "public_health_clinic", "veterinary", "nursing_home", "social_facility", ], }, "transport": { "emoji": "🚉", "label": "Transport", "categories": [ "train_station", "bus_station", "bus_stop", "metro_station", "light_rail_station", "tram_stop", "ferry_terminal", "airport", ], }, "parks": { "emoji": "🌳", "label": "Parks & Leisure", "categories": [ "park", "national_park", "nature_reserve", "dog_park", "playground", "garden", "sports_centre", "swimming_pool", "gym", "golf_course", "marina", ], }, "emergency": { "emoji": "🚨", "label": "Emergency", "categories": ["police_department", "fire_department"], }, "supermarkets": { "emoji": "🛒", "label": "Supermarkets & Grocery", "categories": [ "supermarket", "grocery_store", "convenience_store", "bakery", "butcher", "greengrocer", "deli", ], }, "shopping": { "emoji": "🛍️", "label": "Shopping", "categories": [ "department_store", "clothing_store", "shoe_store", "electronics_store", "hardware_store", "furniture_store", "bookshop", "newsagent", "charity_shop", "shopping_centre", "optician", "off_licence", ], }, "food_drink": { "emoji": "🍽️", "label": "Food & Drink", "categories": [ "restaurant", "cafe", "pub", "bar", "fast_food", "food_court", "ice_cream", "beer_garden", ], }, "personal_care": { "emoji": "💇", "label": "Personal Care", "categories": [ "hairdresser", "beauty_salon", "laundry", "dry_cleaning", ], }, "finance": { "emoji": "🏦", "label": "Finance", "categories": ["bank", "atm", "bureau_de_change"], }, "entertainment": { "emoji": "🎭", "label": "Entertainment & Culture", "categories": [ "cinema", "theatre", "nightclub", "community_centre", "arts_centre", "museum", "gallery", "attraction", "zoo", "theme_park", "viewpoint", ], }, "accommodation": { "emoji": "🏨", "label": "Accommodation", "categories": [ "hotel", "hostel", "guest_house", "campsite", "caravan_site", ], }, "religion": { "emoji": "🛐", "label": "Places of Worship", "categories": ["place_of_worship"], }, "government": { "emoji": "🏛️", "label": "Government & Public", "categories": [ "town_hall", "courthouse", "post_office", "prison", "public_toilets", ], }, "automotive": { "emoji": "⛽", "label": "Automotive", "categories": [ "petrol_station", "ev_charging", "car_dealer", "car_repair", "parking", "bicycle_parking", ], }, "recycling": { "emoji": "♻️", "label": "Recycling & Waste", "categories": ["recycling", "waste_disposal"], }, } # Built at startup from the data — only groups whose member categories # actually appear in the parquet file are included. _active_groups: dict[str, dict] = {} # Reverse lookup: category value -> group key (built at startup) _cat_to_group: dict[str, str] = {} # Cache the dataframe _df_cache: pl.DataFrame | None = None def _load_and_build() -> pl.DataFrame | None: """Load the parquet, build category groups from actual data.""" global _df_cache, _active_groups, _cat_to_group if not DATA_FILE.exists(): return None df = pl.read_parquet(DATA_FILE).select("id", "name", "category", "lat", "lng") # Distinct categories present in the data data_categories: set[str] = set( df.select("category").unique().to_series().to_list() ) # Per-category counts for the response counts: dict[str, int] = dict( df.group_by("category") .agg(pl.len().alias("n")) .iter_rows() ) # Build reverse map from every known category to its group cat_to_group: dict[str, str] = {} for key, gdef in _GROUP_DEFS.items(): for cat in gdef["categories"]: cat_to_group[cat] = key # Only keep categories that belong to a known group known_categories = data_categories & cat_to_group.keys() # Build active groups — only those with at least one matching category active: dict[str, dict] = {} for key, gdef in _GROUP_DEFS.items(): present = [c for c in gdef["categories"] if c in known_categories] if present: active[key] = { "emoji": gdef["emoji"], "label": gdef["label"], "categories": present, "count": sum(counts.get(c, 0) for c in present), } _active_groups = active _cat_to_group = cat_to_group # Filter dataframe to only known categories _df_cache = df.filter(pl.col("category").is_in(known_categories)) return _df_cache def get_df() -> pl.DataFrame | None: """Return cached POI dataframe, loading if necessary.""" if _df_cache is None: return _load_and_build() return _df_cache def preload_pois() -> None: """Preload POI data on startup.""" df = _load_and_build() if df is not None: n_groups = len(_active_groups) print(f"Loaded {len(df):,} POIs across {n_groups} category groups") @router.get("/pois") async def get_pois( categories: str = Query(..., description="Comma-separated category groups"), bounds: str = Query(..., description="Bounding box: south,west,north,east"), ) -> dict: """Get POIs within bounds for specified category groups.""" df = get_df() if df is None: return {"features": []} try: south, west, north, east = map(float, bounds.split(",")) except ValueError: return {"features": []} requested_groups = [g.strip() for g in categories.split(",")] cats_to_include: set[str] = set() for group in requested_groups: if group in _active_groups: cats_to_include.update(_active_groups[group]["categories"]) if not cats_to_include: return {"features": []} filtered = df.filter( (pl.col("lat") >= south) & (pl.col("lat") <= north) & (pl.col("lng") >= west) & (pl.col("lng") <= east) & (pl.col("category").is_in(cats_to_include)) ) MAX_POIS = 5000 if len(filtered) > MAX_POIS: filtered = filtered.sample(n=MAX_POIS, seed=42) return {"features": filtered.to_dicts()} @router.get("/poi-categories") async def get_poi_categories() -> dict: """Get available POI category groups derived from loaded data.""" return { "categories": { key: { "emoji": group["emoji"], "label": group["label"], "count": group["count"], } for key, group in _active_groups.items() } }