use std::collections::VecDeque; use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{bail, Context}; use parking_lot::Mutex; use polars::lazy::frame::LazyFrame; use rustc_hash::{FxHashMap, FxHashSet}; use tracing::info; /// Per-postcode travel time data: median and optional best-case (transit only). #[derive(Clone, Copy)] pub struct TravelDataRow { pub minutes: i16, pub best_minutes: Option, } /// Cached postcode → travel time data for a single destination file. pub type TravelData = Arc>; /// Simple LRU cache for travel time data, limited to `capacity` entries. struct LruCache { map: FxHashMap<(String, String), TravelData>, order: VecDeque<(String, String)>, capacity: usize, } impl LruCache { fn new(capacity: usize) -> Self { Self { map: FxHashMap::default(), order: VecDeque::with_capacity(capacity), capacity, } } fn get(&mut self, key: &(String, String)) -> Option { if let Some(data) = self.map.get(key) { // Move to front (most recently used) if let Some(pos) = self.order.iter().position(|k| k == key) { self.order.remove(pos); self.order.push_front(key.clone()); } Some(data.clone()) } else { None } } fn insert(&mut self, key: (String, String), data: TravelData) { if self.map.contains_key(&key) { self.map.insert(key.clone(), data); if let Some(pos) = self.order.iter().position(|k| k == &key) { self.order.remove(pos); } self.order.push_front(key); } else { while self.map.len() >= self.capacity { if let Some(old_key) = self.order.pop_back() { self.map.remove(&old_key); } } self.map.insert(key.clone(), data); self.order.push_front(key); } } } /// Manages on-demand loading and caching of precomputed travel time parquet files. /// /// Directory structure: `{base_dir}/{mode}/{slug}.parquet` /// Each parquet file has columns: `pcds` (String), `travel_minutes` (Int16). pub struct TravelTimeStore { base_dir: PathBuf, /// Available transport modes (subdirectory names, e.g., "bicycle") pub available_modes: Vec, /// mode → set of destination slugs (filenames without .parquet) pub destinations: FxHashMap>, cache: Mutex, } impl TravelTimeStore { /// Scan the travel-times directory to discover available modes and destinations. pub fn load(base_dir: &Path, cache_capacity: usize) -> anyhow::Result { let mut available_modes = Vec::new(); let mut destinations: FxHashMap> = FxHashMap::default(); for entry in std::fs::read_dir(base_dir) .with_context(|| format!("Failed to read travel-times dir: {}", base_dir.display()))? { let entry = entry?; let path = entry.path(); if !path.is_dir() { continue; } let mode = entry.file_name().to_string_lossy().to_string(); let mut slugs = FxHashSet::default(); for file_entry in std::fs::read_dir(&path) .with_context(|| format!("Failed to read mode dir: {}", path.display()))? { let file_entry = file_entry?; let file_name = file_entry.file_name(); let file_name = file_name.to_string_lossy(); if file_name.ends_with(".parquet") { let slug = file_name.trim_end_matches(".parquet").to_string(); slugs.insert(slug); } } if !slugs.is_empty() { info!( mode = mode.as_str(), destinations = slugs.len(), "Travel time mode discovered" ); available_modes.push(mode.clone()); destinations.insert(mode, slugs); } } available_modes.sort(); Ok(Self { base_dir: base_dir.to_path_buf(), available_modes, destinations, cache: Mutex::new(LruCache::new(cache_capacity)), }) } /// Load travel time data for a given mode and destination slug. /// Returns a cached or freshly-loaded postcode → travel_minutes mapping. pub fn get(&self, mode: &str, slug: &str) -> anyhow::Result { let key = (mode.to_string(), slug.to_string()); // Check cache first { let mut cache = self.cache.lock(); if let Some(data) = cache.get(&key) { return Ok(data); } } // Load from file (no lock held — harmless if two threads load the same file) let path = self .base_dir .join(mode) .join(format!("{}.parquet", slug)); if !path.exists() { bail!("Travel time file not found: {}", path.display()); } let df = LazyFrame::scan_parquet(&path, Default::default()) .with_context(|| format!("Failed to scan: {}", path.display()))? .collect() .with_context(|| format!("Failed to read: {}", path.display()))?; let postcodes = df .column("pcds") .context("Missing 'pcds' column")? .str() .context("'pcds' is not string")?; let minutes = df .column("travel_minutes") .context("Missing 'travel_minutes' column")? .i16() .context("'travel_minutes' is not i16")?; let best = df .column("best_minutes") .ok() .map(|col| col.i16().expect("'best_minutes' is not i16")); let mut map = FxHashMap::default(); map.reserve(df.height()); for (i, (pc, min)) in postcodes.into_iter().zip(minutes.into_iter()).enumerate() { if let (Some(pc), Some(min)) = (pc, min) { let best_min = best.as_ref().and_then(|b| b.get(i)); map.insert( pc.to_string(), TravelDataRow { minutes: min, best_minutes: best_min, }, ); } } let data: TravelData = Arc::new(map); // Insert into cache { let mut cache = self.cache.lock(); cache.insert(key, data.clone()); } Ok(data) } /// Check if a mode + slug combination is available. pub fn has_destination(&self, mode: &str, slug: &str) -> bool { self.destinations .get(mode) .map(|slugs| slugs.contains(slug)) .unwrap_or(false) } } /// Slugify a place name to match travel time file naming convention. /// "Abbey Hey" → "abbey-hey", "A'Bhuaile Ghlas" → "a-bhuaile-ghlas" pub fn slugify(name: &str) -> String { let mut result = String::with_capacity(name.len()); let mut last_was_hyphen = true; // Start true to skip leading hyphens for ch in name.chars() { if ch.is_ascii_alphanumeric() { result.push(ch.to_ascii_lowercase()); last_was_hyphen = false; } else if !last_was_hyphen { result.push('-'); last_was_hyphen = true; } } if result.ends_with('-') { result.pop(); } result } #[cfg(test)] mod tests { use super::*; #[test] fn slugify_basic() { assert_eq!(slugify("Abbey Hey"), "abbey-hey"); assert_eq!(slugify("Abbots Bickington"), "abbots-bickington"); assert_eq!(slugify("London"), "london"); } #[test] fn slugify_special_chars() { assert_eq!(slugify("A'Bhuaile Ghlas"), "a-bhuaile-ghlas"); } #[test] fn slugify_edges() { assert_eq!(slugify(" Hello "), "hello"); assert_eq!(slugify("Abbey"), "abbey"); } }