282 lines
9.6 KiB
Rust
282 lines
9.6 KiB
Rust
use std::collections::VecDeque;
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::Arc;
|
|
|
|
use anyhow::Context;
|
|
use parking_lot::Mutex;
|
|
use polars::lazy::frame::LazyFrame;
|
|
use rustc_hash::{FxHashMap, FxHashSet};
|
|
use tracing::info;
|
|
|
|
/// Per-postcode travel time data: median, optional best-case (transit only),
|
|
/// and optional journey instructions (JSON leg array, transit only with --paths).
|
|
#[derive(Clone)]
|
|
pub struct TravelDataRow {
|
|
pub minutes: i16,
|
|
pub best_minutes: Option<i16>,
|
|
pub journey: Option<Arc<str>>,
|
|
}
|
|
|
|
/// Cached postcode → travel time data for a single destination file.
|
|
pub type TravelData = Arc<FxHashMap<String, TravelDataRow>>;
|
|
|
|
/// Simple LRU cache for travel time data, limited to `capacity` entries.
|
|
struct LruCache {
|
|
map: FxHashMap<(String, String), TravelData>,
|
|
order: VecDeque<(String, String)>,
|
|
capacity: usize,
|
|
}
|
|
|
|
impl LruCache {
|
|
fn new(capacity: usize) -> Self {
|
|
Self {
|
|
map: FxHashMap::default(),
|
|
order: VecDeque::with_capacity(capacity),
|
|
capacity,
|
|
}
|
|
}
|
|
|
|
fn get(&mut self, key: &(String, String)) -> Option<TravelData> {
|
|
if let Some(data) = self.map.get(key) {
|
|
// Move to front (most recently used)
|
|
if let Some(pos) = self.order.iter().position(|k| k == key) {
|
|
self.order.remove(pos);
|
|
self.order.push_front(key.clone());
|
|
}
|
|
Some(data.clone())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn insert(&mut self, key: (String, String), data: TravelData) {
|
|
if self.map.contains_key(&key) {
|
|
self.map.insert(key.clone(), data);
|
|
if let Some(pos) = self.order.iter().position(|k| k == &key) {
|
|
self.order.remove(pos);
|
|
}
|
|
self.order.push_front(key);
|
|
} else {
|
|
while self.map.len() >= self.capacity {
|
|
if let Some(old_key) = self.order.pop_back() {
|
|
self.map.remove(&old_key);
|
|
}
|
|
}
|
|
self.map.insert(key.clone(), data);
|
|
self.order.push_front(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Strip a numeric prefix like "000000-" from a filename stem.
|
|
/// "000000-bank-tube-station" → "bank-tube-station"
|
|
fn strip_numeric_prefix(stem: &str) -> &str {
|
|
if let Some(pos) = stem.find('-') {
|
|
if stem[..pos].chars().all(|ch| ch.is_ascii_digit()) {
|
|
return &stem[pos + 1..];
|
|
}
|
|
}
|
|
stem
|
|
}
|
|
|
|
/// Manages on-demand loading and caching of precomputed travel time parquet files.
|
|
///
|
|
/// Directory structure: `{base_dir}/{mode}/{NNNNNN-slug}.parquet`
|
|
/// Files have a numeric prefix for uniqueness; lookups use the stripped slug.
|
|
/// Each parquet file has columns: `pcds` (String), `travel_minutes` (Int16).
|
|
pub struct TravelTimeStore {
|
|
base_dir: PathBuf,
|
|
/// Available transport modes (subdirectory names, e.g., "bicycle")
|
|
pub available_modes: Vec<String>,
|
|
/// mode → set of destination slugs (numeric prefix stripped)
|
|
pub destinations: FxHashMap<String, FxHashSet<String>>,
|
|
/// (mode, stripped_slug) → full filename stem (with numeric prefix)
|
|
slug_to_file: FxHashMap<(String, String), String>,
|
|
cache: Mutex<LruCache>,
|
|
}
|
|
|
|
impl TravelTimeStore {
|
|
/// Scan the travel-times directory to discover available modes and destinations.
|
|
/// Filename stems have a numeric prefix (e.g., "000000-bank-tube-station") which
|
|
/// is stripped for slug lookups but preserved for file loading.
|
|
pub fn load(base_dir: &Path, cache_capacity: usize) -> anyhow::Result<Self> {
|
|
let mut available_modes = Vec::new();
|
|
let mut destinations: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
|
|
let mut slug_to_file: FxHashMap<(String, String), String> = FxHashMap::default();
|
|
|
|
for entry in std::fs::read_dir(base_dir)
|
|
.with_context(|| format!("Failed to read travel-times dir: {}", base_dir.display()))?
|
|
{
|
|
let entry = entry?;
|
|
let path = entry.path();
|
|
if !path.is_dir() {
|
|
continue;
|
|
}
|
|
let mode = entry.file_name().to_string_lossy().to_string();
|
|
|
|
let mut slugs = FxHashSet::default();
|
|
for file_entry in std::fs::read_dir(&path)
|
|
.with_context(|| format!("Failed to read mode dir: {}", path.display()))?
|
|
{
|
|
let file_entry = file_entry?;
|
|
let file_name = file_entry.file_name();
|
|
let file_name = file_name.to_string_lossy();
|
|
if file_name.ends_with(".parquet") {
|
|
let file_stem = file_name.trim_end_matches(".parquet");
|
|
let slug = strip_numeric_prefix(file_stem).to_string();
|
|
slug_to_file.insert((mode.clone(), slug.clone()), file_stem.to_string());
|
|
slugs.insert(slug);
|
|
}
|
|
}
|
|
|
|
if !slugs.is_empty() {
|
|
info!(
|
|
mode = mode.as_str(),
|
|
destinations = slugs.len(),
|
|
"Travel time mode discovered"
|
|
);
|
|
available_modes.push(mode.clone());
|
|
destinations.insert(mode, slugs);
|
|
}
|
|
}
|
|
|
|
available_modes.sort();
|
|
|
|
Ok(Self {
|
|
base_dir: base_dir.to_path_buf(),
|
|
available_modes,
|
|
destinations,
|
|
slug_to_file,
|
|
cache: Mutex::new(LruCache::new(cache_capacity)),
|
|
})
|
|
}
|
|
|
|
/// Load travel time data for a given mode and destination slug.
|
|
/// Returns a cached or freshly-loaded postcode → travel_minutes mapping.
|
|
pub fn get(&self, mode: &str, slug: &str) -> anyhow::Result<TravelData> {
|
|
let key = (mode.to_string(), slug.to_string());
|
|
|
|
// Check cache first
|
|
{
|
|
let mut cache = self.cache.lock();
|
|
if let Some(data) = cache.get(&key) {
|
|
return Ok(data);
|
|
}
|
|
}
|
|
|
|
// Resolve slug to actual filename (may have numeric prefix).
|
|
// Reject unknown slugs rather than falling back to raw input to prevent path traversal.
|
|
let file_stem = self
|
|
.slug_to_file
|
|
.get(&key)
|
|
.ok_or_else(|| anyhow::anyhow!("Unknown travel destination: {mode}/{slug}"))?;
|
|
let path = self
|
|
.base_dir
|
|
.join(mode)
|
|
.join(format!("{}.parquet", file_stem));
|
|
|
|
let df = LazyFrame::scan_parquet(&path, Default::default())
|
|
.with_context(|| format!("Failed to scan: {}", path.display()))?
|
|
.collect()
|
|
.with_context(|| format!("Failed to read: {}", path.display()))?;
|
|
|
|
let postcodes = df
|
|
.column("pcds")
|
|
.context("Missing 'pcds' column")?
|
|
.str()
|
|
.context("'pcds' is not string")?;
|
|
let minutes = df
|
|
.column("travel_minutes")
|
|
.context("Missing 'travel_minutes' column")?
|
|
.i16()
|
|
.context("'travel_minutes' is not i16")?;
|
|
let best = df
|
|
.column("best_minutes")
|
|
.ok()
|
|
.map(|col| col.i16().expect("'best_minutes' is not i16"));
|
|
let journeys = df
|
|
.column("journey")
|
|
.ok()
|
|
.map(|col| col.str().expect("'journey' is not string"));
|
|
|
|
let mut map = FxHashMap::default();
|
|
map.reserve(df.height());
|
|
for (i, (pc, min)) in postcodes.into_iter().zip(minutes.into_iter()).enumerate() {
|
|
if let (Some(pc), Some(min)) = (pc, min) {
|
|
let best_min = best.as_ref().and_then(|b| b.get(i));
|
|
let journey = journeys.as_ref().and_then(|j| j.get(i)).map(Arc::from);
|
|
map.insert(
|
|
pc.to_string(),
|
|
TravelDataRow {
|
|
minutes: min,
|
|
best_minutes: best_min,
|
|
journey,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
let data: TravelData = Arc::new(map);
|
|
|
|
// Insert into cache
|
|
{
|
|
let mut cache = self.cache.lock();
|
|
cache.insert(key, data.clone());
|
|
}
|
|
|
|
Ok(data)
|
|
}
|
|
|
|
/// Check if a mode + slug combination is available.
|
|
pub fn has_destination(&self, mode: &str, slug: &str) -> bool {
|
|
self.destinations
|
|
.get(mode)
|
|
.map(|slugs| slugs.contains(slug))
|
|
.unwrap_or(false)
|
|
}
|
|
}
|
|
|
|
/// Slugify a place name to match travel time file naming convention.
|
|
/// "Abbey Hey" → "abbey-hey", "A'Bhuaile Ghlas" → "a-bhuaile-ghlas"
|
|
pub fn slugify(name: &str) -> String {
|
|
let mut result = String::with_capacity(name.len());
|
|
let mut last_was_hyphen = true; // Start true to skip leading hyphens
|
|
for ch in name.chars() {
|
|
if ch.is_ascii_alphanumeric() {
|
|
result.push(ch.to_ascii_lowercase());
|
|
last_was_hyphen = false;
|
|
} else if !last_was_hyphen {
|
|
result.push('-');
|
|
last_was_hyphen = true;
|
|
}
|
|
}
|
|
if result.ends_with('-') {
|
|
result.pop();
|
|
}
|
|
result
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn slugify_basic() {
|
|
assert_eq!(slugify("Abbey Hey"), "abbey-hey");
|
|
assert_eq!(slugify("London"), "london");
|
|
}
|
|
|
|
#[test]
|
|
fn strip_numeric_prefix_basic() {
|
|
assert_eq!(
|
|
strip_numeric_prefix("000000-bank-tube-station"),
|
|
"bank-tube-station"
|
|
);
|
|
assert_eq!(strip_numeric_prefix("000123-abbey-hey"), "abbey-hey");
|
|
assert_eq!(
|
|
strip_numeric_prefix("bank-tube-station"),
|
|
"bank-tube-station"
|
|
);
|
|
assert_eq!(strip_numeric_prefix("london"), "london");
|
|
}
|
|
}
|