Good stuff

This commit is contained in:
Andras Schmelczer 2026-02-22 22:36:40 +00:00
parent 9da2db707f
commit 8032011708
32 changed files with 1052 additions and 374 deletions

View file

@ -66,24 +66,41 @@ impl LruCache {
}
}
/// Strip a numeric prefix like "000000-" from a filename stem.
/// "000000-bank-tube-station" → "bank-tube-station"
fn strip_numeric_prefix(stem: &str) -> &str {
if let Some(pos) = stem.find('-') {
if stem[..pos].chars().all(|ch| ch.is_ascii_digit()) {
return &stem[pos + 1..];
}
}
stem
}
/// Manages on-demand loading and caching of precomputed travel time parquet files.
///
/// Directory structure: `{base_dir}/{mode}/{slug}.parquet`
/// Directory structure: `{base_dir}/{mode}/{NNNNNN-slug}.parquet`
/// Files have a numeric prefix for uniqueness; lookups use the stripped slug.
/// Each parquet file has columns: `pcds` (String), `travel_minutes` (Int16).
pub struct TravelTimeStore {
base_dir: PathBuf,
/// Available transport modes (subdirectory names, e.g., "bicycle")
pub available_modes: Vec<String>,
/// mode → set of destination slugs (filenames without .parquet)
/// mode → set of destination slugs (numeric prefix stripped)
pub destinations: FxHashMap<String, FxHashSet<String>>,
/// (mode, stripped_slug) → full filename stem (with numeric prefix)
slug_to_file: FxHashMap<(String, String), String>,
cache: Mutex<LruCache>,
}
impl TravelTimeStore {
/// Scan the travel-times directory to discover available modes and destinations.
/// Filename stems have a numeric prefix (e.g., "000000-bank-tube-station") which
/// is stripped for slug lookups but preserved for file loading.
pub fn load(base_dir: &Path, cache_capacity: usize) -> anyhow::Result<Self> {
let mut available_modes = Vec::new();
let mut destinations: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
let mut slug_to_file: FxHashMap<(String, String), String> = FxHashMap::default();
for entry in std::fs::read_dir(base_dir)
.with_context(|| format!("Failed to read travel-times dir: {}", base_dir.display()))?
@ -103,7 +120,12 @@ impl TravelTimeStore {
let file_name = file_entry.file_name();
let file_name = file_name.to_string_lossy();
if file_name.ends_with(".parquet") {
let slug = file_name.trim_end_matches(".parquet").to_string();
let file_stem = file_name.trim_end_matches(".parquet");
let slug = strip_numeric_prefix(file_stem).to_string();
slug_to_file.insert(
(mode.clone(), slug.clone()),
file_stem.to_string(),
);
slugs.insert(slug);
}
}
@ -125,6 +147,7 @@ impl TravelTimeStore {
base_dir: base_dir.to_path_buf(),
available_modes,
destinations,
slug_to_file,
cache: Mutex::new(LruCache::new(cache_capacity)),
})
}
@ -142,11 +165,16 @@ impl TravelTimeStore {
}
}
// Load from file (no lock held — harmless if two threads load the same file)
// Resolve slug to actual filename (may have numeric prefix)
let file_stem = self
.slug_to_file
.get(&key)
.map(|val| val.as_str())
.unwrap_or(slug);
let path = self
.base_dir
.join(mode)
.join(format!("{}.parquet", slug));
.join(format!("{}.parquet", file_stem));
if !path.exists() {
bail!("Travel time file not found: {}", path.display());
}
@ -233,18 +261,15 @@ mod tests {
#[test]
fn slugify_basic() {
assert_eq!(slugify("Abbey Hey"), "abbey-hey");
assert_eq!(slugify("Abbots Bickington"), "abbots-bickington");
assert_eq!(slugify("London"), "london");
}
#[test]
fn slugify_special_chars() {
assert_eq!(slugify("A'Bhuaile Ghlas"), "a-bhuaile-ghlas");
}
#[test]
fn slugify_edges() {
assert_eq!(slugify(" Hello "), "hello");
assert_eq!(slugify("Abbey"), "abbey");
fn strip_numeric_prefix_basic() {
assert_eq!(strip_numeric_prefix("000000-bank-tube-station"), "bank-tube-station");
assert_eq!(strip_numeric_prefix("000123-abbey-hey"), "abbey-hey");
assert_eq!(strip_numeric_prefix("bank-tube-station"), "bank-tube-station");
assert_eq!(strip_numeric_prefix("london"), "london");
}
}