lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
232
server-rs/src/data/travel_time.rs
Normal file
232
server-rs/src/data/travel_time.rs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
use std::collections::VecDeque;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use parking_lot::Mutex;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use tracing::info;
|
||||
|
||||
/// Cached postcode → travel_minutes mapping for a single destination file.
|
||||
pub type TravelData = Arc<FxHashMap<String, i16>>;
|
||||
|
||||
/// Simple LRU cache for travel time data, limited to `capacity` entries.
|
||||
struct LruCache {
|
||||
map: FxHashMap<(String, String), TravelData>,
|
||||
order: VecDeque<(String, String)>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl LruCache {
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
map: FxHashMap::default(),
|
||||
order: VecDeque::with_capacity(capacity),
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&mut self, key: &(String, String)) -> Option<TravelData> {
|
||||
if let Some(data) = self.map.get(key) {
|
||||
// Move to front (most recently used)
|
||||
if let Some(pos) = self.order.iter().position(|k| k == key) {
|
||||
self.order.remove(pos);
|
||||
self.order.push_front(key.clone());
|
||||
}
|
||||
Some(data.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: (String, String), data: TravelData) {
|
||||
if self.map.contains_key(&key) {
|
||||
self.map.insert(key.clone(), data);
|
||||
if let Some(pos) = self.order.iter().position(|k| k == &key) {
|
||||
self.order.remove(pos);
|
||||
}
|
||||
self.order.push_front(key);
|
||||
} else {
|
||||
while self.map.len() >= self.capacity {
|
||||
if let Some(old_key) = self.order.pop_back() {
|
||||
self.map.remove(&old_key);
|
||||
}
|
||||
}
|
||||
self.map.insert(key.clone(), data);
|
||||
self.order.push_front(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages on-demand loading and caching of precomputed travel time parquet files.
|
||||
///
|
||||
/// Directory structure: `{base_dir}/{mode}/{slug}.parquet`
|
||||
/// Each parquet file has columns: `pcds` (String), `travel_minutes` (Int16).
|
||||
pub struct TravelTimeStore {
|
||||
base_dir: PathBuf,
|
||||
/// Available transport modes (subdirectory names, e.g., "bicycle")
|
||||
pub available_modes: Vec<String>,
|
||||
/// mode → set of destination slugs (filenames without .parquet)
|
||||
pub destinations: FxHashMap<String, FxHashSet<String>>,
|
||||
cache: Mutex<LruCache>,
|
||||
}
|
||||
|
||||
impl TravelTimeStore {
|
||||
/// Scan the travel-times directory to discover available modes and destinations.
|
||||
pub fn load(base_dir: &Path, cache_capacity: usize) -> anyhow::Result<Self> {
|
||||
let mut available_modes = Vec::new();
|
||||
let mut destinations: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
|
||||
|
||||
for entry in std::fs::read_dir(base_dir)
|
||||
.with_context(|| format!("Failed to read travel-times dir: {}", base_dir.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if !path.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let mode = entry.file_name().to_string_lossy().to_string();
|
||||
|
||||
let mut slugs = FxHashSet::default();
|
||||
for file_entry in std::fs::read_dir(&path)
|
||||
.with_context(|| format!("Failed to read mode dir: {}", path.display()))?
|
||||
{
|
||||
let file_entry = file_entry?;
|
||||
let file_name = file_entry.file_name();
|
||||
let file_name = file_name.to_string_lossy();
|
||||
if file_name.ends_with(".parquet") {
|
||||
let slug = file_name.trim_end_matches(".parquet").to_string();
|
||||
slugs.insert(slug);
|
||||
}
|
||||
}
|
||||
|
||||
if !slugs.is_empty() {
|
||||
info!(
|
||||
mode = mode.as_str(),
|
||||
destinations = slugs.len(),
|
||||
"Travel time mode discovered"
|
||||
);
|
||||
available_modes.push(mode.clone());
|
||||
destinations.insert(mode, slugs);
|
||||
}
|
||||
}
|
||||
|
||||
available_modes.sort();
|
||||
|
||||
Ok(Self {
|
||||
base_dir: base_dir.to_path_buf(),
|
||||
available_modes,
|
||||
destinations,
|
||||
cache: Mutex::new(LruCache::new(cache_capacity)),
|
||||
})
|
||||
}
|
||||
|
||||
/// Load travel time data for a given mode and destination slug.
|
||||
/// Returns a cached or freshly-loaded postcode → travel_minutes mapping.
|
||||
pub fn get(&self, mode: &str, slug: &str) -> anyhow::Result<TravelData> {
|
||||
let key = (mode.to_string(), slug.to_string());
|
||||
|
||||
// Check cache first
|
||||
{
|
||||
let mut cache = self.cache.lock();
|
||||
if let Some(data) = cache.get(&key) {
|
||||
return Ok(data);
|
||||
}
|
||||
}
|
||||
|
||||
// Load from file (no lock held — harmless if two threads load the same file)
|
||||
let path = self
|
||||
.base_dir
|
||||
.join(mode)
|
||||
.join(format!("{}.parquet", slug));
|
||||
if !path.exists() {
|
||||
bail!("Travel time file not found: {}", path.display());
|
||||
}
|
||||
|
||||
let df = LazyFrame::scan_parquet(&path, Default::default())
|
||||
.with_context(|| format!("Failed to scan: {}", path.display()))?
|
||||
.collect()
|
||||
.with_context(|| format!("Failed to read: {}", path.display()))?;
|
||||
|
||||
let postcodes = df
|
||||
.column("pcds")
|
||||
.context("Missing 'pcds' column")?
|
||||
.str()
|
||||
.context("'pcds' is not string")?;
|
||||
let minutes = df
|
||||
.column("travel_minutes")
|
||||
.context("Missing 'travel_minutes' column")?
|
||||
.i16()
|
||||
.context("'travel_minutes' is not i16")?;
|
||||
|
||||
let mut map = FxHashMap::default();
|
||||
map.reserve(df.height());
|
||||
for (pc, min) in postcodes.into_iter().zip(minutes.into_iter()) {
|
||||
if let (Some(pc), Some(min)) = (pc, min) {
|
||||
map.insert(pc.to_string(), min);
|
||||
}
|
||||
}
|
||||
|
||||
let data: TravelData = Arc::new(map);
|
||||
|
||||
// Insert into cache
|
||||
{
|
||||
let mut cache = self.cache.lock();
|
||||
cache.insert(key, data.clone());
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
/// Check if a mode + slug combination is available.
|
||||
pub fn has_destination(&self, mode: &str, slug: &str) -> bool {
|
||||
self.destinations
|
||||
.get(mode)
|
||||
.map(|slugs| slugs.contains(slug))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Slugify a place name to match travel time file naming convention.
|
||||
/// "Abbey Hey" → "abbey-hey", "A'Bhuaile Ghlas" → "a-bhuaile-ghlas"
|
||||
pub fn slugify(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
let mut last_was_hyphen = true; // Start true to skip leading hyphens
|
||||
for ch in name.chars() {
|
||||
if ch.is_ascii_alphanumeric() {
|
||||
result.push(ch.to_ascii_lowercase());
|
||||
last_was_hyphen = false;
|
||||
} else if !last_was_hyphen {
|
||||
result.push('-');
|
||||
last_was_hyphen = true;
|
||||
}
|
||||
}
|
||||
if result.ends_with('-') {
|
||||
result.pop();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn slugify_basic() {
|
||||
assert_eq!(slugify("Abbey Hey"), "abbey-hey");
|
||||
assert_eq!(slugify("Abbots Bickington"), "abbots-bickington");
|
||||
assert_eq!(slugify("London"), "london");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slugify_special_chars() {
|
||||
assert_eq!(slugify("A'Bhuaile Ghlas"), "a-bhuaile-ghlas");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slugify_edges() {
|
||||
assert_eq!(slugify(" Hello "), "hello");
|
||||
assert_eq!(slugify("Abbey"), "abbey");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue