Update map to do filtering

This commit is contained in:
Andras Schmelczer 2026-01-30 18:34:12 +00:00
parent 6122ee44da
commit d4fe881ef4
8 changed files with 349 additions and 372 deletions

View file

@ -9,8 +9,11 @@ router = APIRouter()
DATA_FILE = Path("data_sources/uk_pois.parquet")
# Category groups with emoji and member categories
POI_CATEGORY_GROUPS: dict[str, dict] = {
# Group definitions: maps a group key to its display metadata and the
# individual POI categories it contains. Categories are matched against
# the values that actually exist in the loaded parquet so that the
# selector only shows groups with real data.
_GROUP_DEFS: dict[str, dict] = {
"schools": {
"emoji": "🏫",
"label": "Schools",
@ -189,33 +192,80 @@ POI_CATEGORY_GROUPS: dict[str, dict] = {
},
}
# Flatten for quick lookup
ALL_CATEGORIES = {
cat for group in POI_CATEGORY_GROUPS.values() for cat in group["categories"]
}
# Built at startup from the data — only groups whose member categories
# actually appear in the parquet file are included.
_active_groups: dict[str, dict] = {}
# Reverse lookup: category value -> group key (built at startup)
_cat_to_group: dict[str, str] = {}
# Cache the dataframe
_df_cache: pl.DataFrame | None = None
def _load_and_build() -> pl.DataFrame | None:
"""Load the parquet, build category groups from actual data."""
global _df_cache, _active_groups, _cat_to_group
if not DATA_FILE.exists():
return None
df = pl.read_parquet(DATA_FILE).select("id", "name", "category", "lat", "lng")
# Distinct categories present in the data
data_categories: set[str] = set(
df.select("category").unique().to_series().to_list()
)
# Per-category counts for the response
counts: dict[str, int] = dict(
df.group_by("category")
.agg(pl.len().alias("n"))
.iter_rows()
)
# Build reverse map from every known category to its group
cat_to_group: dict[str, str] = {}
for key, gdef in _GROUP_DEFS.items():
for cat in gdef["categories"]:
cat_to_group[cat] = key
# Only keep categories that belong to a known group
known_categories = data_categories & cat_to_group.keys()
# Build active groups — only those with at least one matching category
active: dict[str, dict] = {}
for key, gdef in _GROUP_DEFS.items():
present = [c for c in gdef["categories"] if c in known_categories]
if present:
active[key] = {
"emoji": gdef["emoji"],
"label": gdef["label"],
"categories": present,
"count": sum(counts.get(c, 0) for c in present),
}
_active_groups = active
_cat_to_group = cat_to_group
# Filter dataframe to only known categories
_df_cache = df.filter(pl.col("category").is_in(known_categories))
return _df_cache
def get_df() -> pl.DataFrame | None:
"""Load and cache the POI dataframe."""
global _df_cache
"""Return cached POI dataframe, loading if necessary."""
if _df_cache is None:
if not DATA_FILE.exists():
return None
df = pl.read_parquet(DATA_FILE)
_df_cache = df.select("id", "name", "category", "lat", "lng").filter(
pl.col("category").is_in(ALL_CATEGORIES)
)
return _load_and_build()
return _df_cache
def preload_pois() -> None:
"""Preload POI data on startup."""
df = get_df()
df = _load_and_build()
if df is not None:
print(f"Loaded {len(df):,} POIs")
n_groups = len(_active_groups)
print(f"Loaded {len(df):,} POIs across {n_groups} category groups")
@router.get("/pois")
@ -234,10 +284,10 @@ async def get_pois(
return {"features": []}
requested_groups = [g.strip() for g in categories.split(",")]
cats_to_include = set()
cats_to_include: set[str] = set()
for group in requested_groups:
if group in POI_CATEGORY_GROUPS:
cats_to_include.update(POI_CATEGORY_GROUPS[group]["categories"])
if group in _active_groups:
cats_to_include.update(_active_groups[group]["categories"])
if not cats_to_include:
return {"features": []}
@ -259,10 +309,14 @@ async def get_pois(
@router.get("/poi-categories")
async def get_poi_categories() -> dict:
"""Get available POI category groups with emoji and labels."""
"""Get available POI category groups derived from loaded data."""
return {
"categories": {
key: {"emoji": group["emoji"], "label": group["label"]}
for key, group in POI_CATEGORY_GROUPS.items()
key: {
"emoji": group["emoji"],
"label": group["label"],
"count": group["count"],
}
for key, group in _active_groups.items()
}
}