use std::collections::{HashMap, HashSet}; use std::path::Path; use anyhow::{bail, Context}; use polars::frame::DataFrame; use polars::lazy::frame::LazyFrame; use polars::prelude::*; use rustc_hash::FxHashSet; use serde::Serialize; use tracing::info; use crate::features::POI_GROUP_ORDER; use crate::utils::{generate_priorities, InternedColumn}; #[derive(Serialize, Clone)] pub struct POICategoryGroup { pub name: String, pub categories: Vec, } const GROCERY_DASHBOARD_CATEGORIES: &[&str] = &[ "Supermarket", "Convenience Store", "Bakery", "Greengrocer", "Aldi", "Amazon", "Asda", "Booths", "Budgens", "Centra", "Co-op", "COOK", "Costco", "Dunnes Stores", "Farmfoods", "Heron Foods", "Iceland", "Lidl", "Makro", "M&S", "Morrisons", "Planet Organic", "Sainsbury's", "Spar", "Tesco", "The Food Warehouse", "Waitrose", "Whole Foods Market", ]; const DASHBOARD_POI_GROUPS: &[(&str, &[&str])] = &[ ( "Public Transport", &[ "Rail station", "Tube station", "Bus station", "Bus stop", "Airport", ], ), ("Groceries", GROCERY_DASHBOARD_CATEGORIES), ("Food & Drink", &["Café", "Restaurant", "Pub", "Fast Food"]), ("Green Space", &["Park", "Playground"]), ("Education", &["School"]), ( "Health", &["GP Surgery", "Pharmacy", "Dentist", "Hospital & Clinic"], ), ( "Leisure", &[ "Gym & Fitness", "Sports Centre", "Cinema", "Theatre", "Library", ], ), ( "Practical", &["Post Office", "Bank", "EV Charging", "Fuel Station"], ), ]; fn add_category_filter_index( category_values: &[String], category: &str, selected: &mut FxHashSet, ) { if let Some(pos) = category_values.iter().position(|value| value == category) { selected.insert(pos as u16); } } pub fn resolve_poi_category_filter(category_values: &[String], categories: &str) -> FxHashSet { let mut selected = FxHashSet::default(); for part in categories.split(',') { let category = part.trim(); if category.is_empty() { continue; } add_category_filter_index(category_values, category, &mut selected); } selected } pub struct POIData { /// Contiguous buffer holding all POI ID strings end-to-end. id_buffer: String, /// Byte offset into `id_buffer` where each row's ID starts. id_offsets: Vec, /// Length in bytes of each row's ID. id_lengths: Vec, pub group: InternedColumn, pub category: InternedColumn, pub icon_category: InternedColumn, pub name: Vec, pub lat: Vec, pub lng: Vec, pub emoji: InternedColumn, /// Deterministic pseudo-random priority per row, used to select a spatially /// uniform subset when the POI count exceeds the per-request limit. /// Computed once at load time so the same POIs are always chosen for a given viewport. pub priority: Vec, } impl POIData { /// Get the ID string for a given row. pub fn id(&self, row: usize) -> &str { let offset = self.id_offsets[row] as usize; let length = self.id_lengths[row] as usize; &self.id_buffer[offset..offset + length] } } fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result> { let column = df .column(name) .with_context(|| format!("Missing column '{name}' in POI data"))?; let string_column = column .str() .with_context(|| format!("Column '{name}' is not a string column"))?; string_column .into_iter() .enumerate() .map(|(row, value)| { value .map(ToString::to_string) .with_context(|| format!("Column '{name}' has null at row {row}")) }) .collect() } fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result> { let column = df .column(name) .with_context(|| format!("Missing column '{name}' in POI data"))?; let cast = column .cast(&DataType::Float32) .with_context(|| format!("Failed to cast column '{name}' to Float32"))?; let float_column = cast .f32() .with_context(|| format!("Column '{name}' is not a float32 column"))?; float_column .into_iter() .enumerate() .map(|(row, value)| value.with_context(|| format!("Column '{name}' has null at row {row}"))) .collect() } impl POIData { pub fn load(parquet_path: &Path) -> anyhow::Result { super::run_polars_io(|| Self::load_inner(parquet_path)) } fn load_inner(parquet_path: &Path) -> anyhow::Result { info!("Loading POI data from {:?}...", parquet_path); let parquet_path = PlRefPath::try_from_path(parquet_path) .context("Failed to normalize POI parquet path")?; let df = LazyFrame::scan_parquet(parquet_path, Default::default()) .context("Failed to scan POI parquet")? .collect() .context("Failed to read POI parquet")?; let row_count = df.height(); info!("Loaded {} POIs", row_count); let id_raw: Vec = extract_str_col(&df, "id")?; let name = extract_str_col(&df, "name")?; let category_raw = extract_str_col(&df, "category")?; let group_raw = extract_str_col(&df, "group")?; let lat = extract_f32_col(&df, "lat")?; let lng = extract_f32_col(&df, "lng")?; let emoji_raw = extract_str_col(&df, "emoji")?; let icon_category_raw = extract_str_col(&df, "icon_category")?; // Pack POI IDs into a contiguous buffer let total_id_bytes: usize = id_raw.iter().map(|s| s.len()).sum(); let mut id_buffer = String::with_capacity(total_id_bytes); let mut id_offsets = Vec::with_capacity(row_count); let mut id_lengths = Vec::with_capacity(row_count); for s in &id_raw { let offset = id_buffer.len() as u32; let length = s.len().min(u16::MAX as usize) as u16; id_offsets.push(offset); id_lengths.push(length); id_buffer.push_str(&s[..length as usize]); } let category = InternedColumn::build(&category_raw); let icon_category = InternedColumn::build(&icon_category_raw); let group = InternedColumn::build(&group_raw); let emoji = InternedColumn::build(&emoji_raw); info!( category_unique = category.values.len(), icon_category_unique = icon_category.values.len(), group_unique = group.values.len(), emoji_unique = emoji.values.len(), "POI string columns interned" ); // Assign a deterministic pseudo-random priority to each row. // This ensures the same POIs are selected across requests, // preventing visual "shuffling" when panning the map. let priority = generate_priorities(row_count); info!("POI data loading complete."); Ok(POIData { id_buffer, id_offsets, id_lengths, name, category, icon_category, group, lat, lng, emoji, priority, }) } /// Build dashboard category groups from every category present in the loaded POI data. pub fn category_groups(&self) -> anyhow::Result> { let mut group_cats: HashMap> = HashMap::new(); let num_pois = self.category.indices.len(); for row in 0..num_pois { let category = self.category.get(row).to_string(); let group = self.group.get(row).to_string(); group_cats.entry(group).or_default().insert(category); } // Validate that data groups match the hardcoded order exactly let expected: HashSet<&str> = POI_GROUP_ORDER.iter().copied().collect(); let actual: HashSet<&str> = group_cats.keys().map(|key| key.as_str()).collect(); let missing_from_data: Vec<&&str> = expected.difference(&actual).collect(); let missing_from_order: Vec<&&str> = actual.difference(&expected).collect(); if !missing_from_data.is_empty() || !missing_from_order.is_empty() { bail!( "POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}", missing_from_data, missing_from_order ); } let preferred_order: HashMap<&str, HashMap<&str, usize>> = DASHBOARD_POI_GROUPS .iter() .map(|(group, categories)| { ( *group, categories .iter() .enumerate() .map(|(idx, category)| (*category, idx)) .collect(), ) }) .collect(); let groups: Vec = POI_GROUP_ORDER .iter() .filter_map(|group_name| { let mut categories: Vec = group_cats .get(*group_name) .map(|categories| categories.iter().cloned().collect()) .unwrap_or_default(); if categories.is_empty() { return None; } let group_order = preferred_order.get(*group_name); categories.sort_by(|a, b| { let a_order = group_order.and_then(|order| order.get(a.as_str())).copied(); let b_order = group_order.and_then(|order| order.get(b.as_str())).copied(); match (a_order, b_order) { (Some(left), Some(right)) => left.cmp(&right), (Some(_), None) => std::cmp::Ordering::Less, (None, Some(_)) => std::cmp::Ordering::Greater, (None, None) => a.cmp(b), } }); Some(POICategoryGroup { name: (*group_name).to_string(), categories, }) }) .collect(); Ok(groups) } } #[cfg(test)] mod tests { use super::*; #[test] fn category_filter_matches_exact_present_categories() { let values = vec![ "Supermarket".to_string(), "Tesco".to_string(), "Aldi".to_string(), "Rail station".to_string(), ]; let selected = resolve_poi_category_filter(&values, "Supermarket,Rail station"); assert!(selected.contains(&0)); assert!(selected.contains(&3)); assert_eq!(selected.len(), 2); } #[test] fn unknown_category_filter_matches_nothing() { let values = vec!["Supermarket".to_string()]; let selected = resolve_poi_category_filter(&values, "Unknown"); assert!(selected.is_empty()); } }