lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
168
server-rs/src/data/places.rs
Normal file
168
server-rs/src/data/places.rs
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use polars::frame::DataFrame;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::*;
|
||||
use tracing::info;
|
||||
|
||||
use crate::utils::InternedColumn;
|
||||
|
||||
pub struct PlaceData {
|
||||
pub name: Vec<String>,
|
||||
pub name_lower: Vec<String>,
|
||||
pub place_type: InternedColumn,
|
||||
pub type_rank: Vec<u8>,
|
||||
pub population: Vec<u32>,
|
||||
pub lat: Vec<f32>,
|
||||
pub lon: Vec<f32>,
|
||||
pub city: Vec<Option<String>>,
|
||||
}
|
||||
|
||||
fn type_rank(place_type: &str) -> u8 {
|
||||
match place_type {
|
||||
"city" => 0,
|
||||
"borough" => 1,
|
||||
"town" => 2,
|
||||
"suburb" => 3,
|
||||
"quarter" => 4,
|
||||
"neighbourhood" => 5,
|
||||
"village" => 6,
|
||||
"station" => 7,
|
||||
"island" => 8,
|
||||
"hamlet" => 9,
|
||||
"locality" => 10,
|
||||
"isolated_dwelling" => 11,
|
||||
_ => 12,
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in places data"))?;
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or("").to_string())
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in places data"))?;
|
||||
let cast = column
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast column '{name}' to Float32"))?;
|
||||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(float_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(0.0))
|
||||
.collect())
|
||||
}
|
||||
|
||||
impl PlaceData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading place data from {:?}...", parquet_path);
|
||||
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan places parquet")?
|
||||
.collect()
|
||||
.context("Failed to read places parquet")?;
|
||||
|
||||
let row_count = df.height();
|
||||
info!("Loaded {} places", row_count);
|
||||
|
||||
let name = extract_str_col(&df, "name")?;
|
||||
let place_type_raw = extract_str_col(&df, "place_type")?;
|
||||
let lat = extract_f32_col(&df, "lat")?;
|
||||
let lon = extract_f32_col(&df, "lon")?;
|
||||
let population: Vec<u32> = if df.column("population").is_ok() {
|
||||
let pop_f32 = extract_f32_col(&df, "population")?;
|
||||
pop_f32.iter().map(|&val| val.max(0.0) as u32).collect()
|
||||
} else {
|
||||
vec![0; row_count]
|
||||
};
|
||||
|
||||
let name_lower: Vec<String> = name.iter().map(|nm| nm.to_lowercase()).collect();
|
||||
let type_rank_vec: Vec<u8> = place_type_raw.iter().map(|pt| type_rank(pt)).collect();
|
||||
let place_type = InternedColumn::build(&place_type_raw);
|
||||
|
||||
// Precompute nearest city for each non-city place
|
||||
let city_indices: Vec<usize> = type_rank_vec
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
||||
.collect();
|
||||
|
||||
let city: Vec<Option<String>> = (0..row_count)
|
||||
.map(|idx| {
|
||||
if type_rank_vec[idx] == 0 {
|
||||
return None; // Cities don't need a city label
|
||||
}
|
||||
let plat = lat[idx];
|
||||
let plon = lon[idx];
|
||||
let cos_lat = (plat.to_radians()).cos();
|
||||
|
||||
let mut best_dist_sq = f32::MAX;
|
||||
let mut best_city: Option<&str> = None;
|
||||
for &ci in &city_indices {
|
||||
let dlat = lat[ci] - plat;
|
||||
let dlon = (lon[ci] - plon) * cos_lat;
|
||||
let dist_sq = dlat * dlat + dlon * dlon;
|
||||
if dist_sq < best_dist_sq {
|
||||
best_dist_sq = dist_sq;
|
||||
best_city = Some(&name[ci]);
|
||||
}
|
||||
}
|
||||
|
||||
// ~100km threshold: 1° ≈ 111km, so 0.9° ≈ 100km → 0.81 squared
|
||||
if best_dist_sq < 0.81 {
|
||||
best_city.map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let with_pop = population.iter().filter(|&&pop| pop > 0).count();
|
||||
let with_city = city.iter().filter(|c| c.is_some()).count();
|
||||
info!(
|
||||
places = row_count,
|
||||
types = place_type.values.len(),
|
||||
with_population = with_pop,
|
||||
with_city = with_city,
|
||||
"Place data loaded"
|
||||
);
|
||||
|
||||
Ok(PlaceData {
|
||||
name,
|
||||
name_lower,
|
||||
place_type,
|
||||
type_rank: type_rank_vec,
|
||||
population,
|
||||
lat,
|
||||
lon,
|
||||
city,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn type_rank_ordering() {
|
||||
assert!(type_rank("city") < type_rank("town"));
|
||||
assert!(type_rank("town") < type_rank("suburb"));
|
||||
assert!(type_rank("suburb") < type_rank("village"));
|
||||
assert!(type_rank("village") < type_rank("hamlet"));
|
||||
assert!(type_rank("hamlet") < type_rank("isolated_dwelling"));
|
||||
}
|
||||
}
|
||||
149
server-rs/src/data/poi.rs
Normal file
149
server-rs/src/data/poi.rs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use polars::frame::DataFrame;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::*;
|
||||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
use crate::features::POI_GROUP_ORDER;
|
||||
use crate::utils::{generate_priorities, InternedColumn};
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub struct POICategoryGroup {
|
||||
pub name: String,
|
||||
pub categories: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct POIData {
|
||||
pub id: Vec<String>,
|
||||
pub group: InternedColumn,
|
||||
pub category: InternedColumn,
|
||||
pub name: Vec<String>,
|
||||
pub lat: Vec<f32>,
|
||||
pub lng: Vec<f32>,
|
||||
pub emoji: InternedColumn,
|
||||
/// Deterministic pseudo-random priority per row, used to select a spatially
|
||||
/// uniform subset when the POI count exceeds the per-request limit.
|
||||
/// Computed once at load time so the same POIs are always chosen for a given viewport.
|
||||
pub priority: Vec<u32>,
|
||||
}
|
||||
|
||||
fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in POI data"))?;
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or("").to_string())
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn extract_f32_col(df: &DataFrame, name: &str, default: f32) -> anyhow::Result<Vec<f32>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in POI data"))?;
|
||||
let cast = column
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast column '{name}' to Float32"))?;
|
||||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(float_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(default))
|
||||
.collect())
|
||||
}
|
||||
|
||||
impl POIData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading POI data from {:?}...", parquet_path);
|
||||
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan POI parquet")?
|
||||
.collect()
|
||||
.context("Failed to read POI parquet")?;
|
||||
|
||||
let row_count = df.height();
|
||||
info!("Loaded {} POIs", row_count);
|
||||
|
||||
let id: Vec<String> = extract_str_col(&df, "id")?;
|
||||
let name = extract_str_col(&df, "name")?;
|
||||
let category_raw = extract_str_col(&df, "category")?;
|
||||
let group_raw = extract_str_col(&df, "group")?;
|
||||
let lat = extract_f32_col(&df, "lat", 0.0)?;
|
||||
let lng = extract_f32_col(&df, "lng", 0.0)?;
|
||||
let emoji_raw = extract_str_col(&df, "emoji")?;
|
||||
|
||||
let category = InternedColumn::build(&category_raw);
|
||||
let group = InternedColumn::build(&group_raw);
|
||||
let emoji = InternedColumn::build(&emoji_raw);
|
||||
|
||||
info!(
|
||||
category_unique = category.values.len(),
|
||||
group_unique = group.values.len(),
|
||||
emoji_unique = emoji.values.len(),
|
||||
"POI string columns interned"
|
||||
);
|
||||
|
||||
// Assign a deterministic pseudo-random priority to each row.
|
||||
// This ensures the same POIs are selected across requests,
|
||||
// preventing visual "shuffling" when panning the map.
|
||||
let priority = generate_priorities(row_count);
|
||||
|
||||
info!("POI data loading complete.");
|
||||
|
||||
Ok(POIData {
|
||||
id,
|
||||
name,
|
||||
category,
|
||||
group,
|
||||
lat,
|
||||
lng,
|
||||
emoji,
|
||||
priority,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build category groups from the loaded POI data, validated against POI_GROUP_ORDER.
|
||||
pub fn category_groups(&self) -> anyhow::Result<Vec<POICategoryGroup>> {
|
||||
let mut group_cats: HashMap<String, HashSet<String>> = HashMap::new();
|
||||
let num_pois = self.category.indices.len();
|
||||
for row in 0..num_pois {
|
||||
let category = self.category.get(row).to_string();
|
||||
let group = self.group.get(row).to_string();
|
||||
group_cats.entry(group).or_default().insert(category);
|
||||
}
|
||||
|
||||
// Validate that data groups match the hardcoded order exactly
|
||||
let expected: HashSet<&str> = POI_GROUP_ORDER.iter().copied().collect();
|
||||
let actual: HashSet<&str> = group_cats.keys().map(|key| key.as_str()).collect();
|
||||
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
|
||||
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
|
||||
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
|
||||
bail!(
|
||||
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
|
||||
missing_from_data, missing_from_order
|
||||
);
|
||||
}
|
||||
|
||||
POI_GROUP_ORDER
|
||||
.iter()
|
||||
.map(|group_name| {
|
||||
let name = group_name.to_string();
|
||||
let mut categories: Vec<String> = group_cats
|
||||
.remove(&name)
|
||||
.context("POI group validated but missing from map")?
|
||||
.into_iter()
|
||||
.collect();
|
||||
categories.sort();
|
||||
Ok(POICategoryGroup { name, categories })
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
179
server-rs/src/data/postcodes.rs
Normal file
179
server-rs/src/data/postcodes.rs
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
use anyhow::Context;
|
||||
use rayon::prelude::*;
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::Deserialize;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// GeoJSON structures for parsing postcode boundary files
|
||||
#[derive(Deserialize)]
|
||||
struct FeatureCollection {
|
||||
features: Vec<Feature>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Feature {
|
||||
geometry: Geometry,
|
||||
properties: Properties,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
enum Geometry {
|
||||
Polygon {
|
||||
coordinates: Vec<Vec<[f64; 2]>>,
|
||||
},
|
||||
MultiPolygon {
|
||||
coordinates: Vec<Vec<Vec<[f64; 2]>>>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Properties {
|
||||
postcodes: String,
|
||||
}
|
||||
|
||||
/// Postcode boundary data: polygon vertices and spatial index for fast queries.
|
||||
pub struct PostcodeData {
|
||||
/// Postcode strings
|
||||
pub postcodes: Vec<String>,
|
||||
/// All polygon parts per postcode: polygons[i] = list of outer rings
|
||||
/// Single Polygon → 1 ring, MultiPolygon → N rings
|
||||
pub polygons: Vec<Vec<Vec<[f32; 2]>>>,
|
||||
/// Centroid (lat, lon) for lookups
|
||||
pub centroids: Vec<(f32, f32)>,
|
||||
/// Lookup from postcode string to index
|
||||
pub postcode_to_idx: FxHashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl PostcodeData {
|
||||
/// Load postcode boundaries from a directory of GeoJSON files.
|
||||
/// Expects the directory to have a `units/` subdirectory containing .geojson files.
|
||||
pub fn load(dir_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading postcode boundaries from {:?}", dir_path);
|
||||
|
||||
let units_dir = dir_path.join("units");
|
||||
if !units_dir.exists() {
|
||||
anyhow::bail!(
|
||||
"Expected 'units' subdirectory in postcode boundaries path: {:?}",
|
||||
dir_path
|
||||
);
|
||||
}
|
||||
|
||||
let mut postcodes: Vec<String> = Vec::new();
|
||||
let mut polygons: Vec<Vec<Vec<[f32; 2]>>> = Vec::new();
|
||||
let mut centroids: Vec<(f32, f32)> = Vec::new();
|
||||
|
||||
// Read all .geojson files in the units directory
|
||||
let mut entries: Vec<_> = fs::read_dir(&units_dir)
|
||||
.with_context(|| format!("Failed to read directory: {:?}", units_dir))?
|
||||
.filter_map(|entry| entry.ok())
|
||||
.filter(|entry| {
|
||||
entry
|
||||
.path()
|
||||
.extension()
|
||||
.map(|ext| ext == "geojson")
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
|
||||
entries.sort_by_key(|entry| entry.path());
|
||||
|
||||
info!(files = entries.len(), "Found GeoJSON files to process");
|
||||
|
||||
// Parse files in parallel
|
||||
let file_results: Vec<_> = entries
|
||||
.into_par_iter()
|
||||
.map(|entry| {
|
||||
let file_path = entry.path();
|
||||
let content = fs::read_to_string(&file_path)
|
||||
.with_context(|| format!("Failed to read file: {:?}", file_path))?;
|
||||
|
||||
let collection: FeatureCollection = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse GeoJSON: {:?}", file_path))?;
|
||||
|
||||
let mut local_postcodes = Vec::new();
|
||||
let mut local_polygons = Vec::new();
|
||||
let mut local_centroids = Vec::new();
|
||||
|
||||
for feature in collection.features {
|
||||
let postcode = feature.properties.postcodes;
|
||||
|
||||
// Extract all outer rings from the geometry
|
||||
let rings: Vec<Vec<[f32; 2]>> = match feature.geometry {
|
||||
Geometry::Polygon { coordinates } => coordinates
|
||||
.first()
|
||||
.map(|ring| {
|
||||
vec![ring
|
||||
.iter()
|
||||
.map(|[lon, lat]| [*lon as f32, *lat as f32])
|
||||
.collect()]
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
Geometry::MultiPolygon { coordinates } => coordinates
|
||||
.iter()
|
||||
.filter_map(|poly| {
|
||||
poly.first().map(|ring| {
|
||||
ring.iter()
|
||||
.map(|[lon, lat]| [*lon as f32, *lat as f32])
|
||||
.collect()
|
||||
})
|
||||
})
|
||||
.collect(),
|
||||
};
|
||||
|
||||
// Compute centroid across all vertices from all rings
|
||||
let total_vertices: usize = rings.iter().map(|ring| ring.len()).sum();
|
||||
let centroid = if total_vertices == 0 {
|
||||
(0.0, 0.0)
|
||||
} else {
|
||||
let mut sum_lat: f32 = 0.0;
|
||||
let mut sum_lon: f32 = 0.0;
|
||||
for ring in &rings {
|
||||
for &[lon, lat] in ring {
|
||||
sum_lat += lat;
|
||||
sum_lon += lon;
|
||||
}
|
||||
}
|
||||
let count = total_vertices as f32;
|
||||
(sum_lat / count, sum_lon / count)
|
||||
};
|
||||
|
||||
local_postcodes.push(postcode);
|
||||
local_polygons.push(rings);
|
||||
local_centroids.push(centroid);
|
||||
}
|
||||
|
||||
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// Flatten results
|
||||
for (local_postcodes, local_polygons, local_centroids) in file_results {
|
||||
postcodes.extend(local_postcodes);
|
||||
polygons.extend(local_polygons);
|
||||
centroids.extend(local_centroids);
|
||||
}
|
||||
|
||||
debug!(
|
||||
postcodes = postcodes.len(),
|
||||
"Extracted postcodes from GeoJSON"
|
||||
);
|
||||
|
||||
// Build postcode -> index lookup
|
||||
let mut postcode_to_idx: FxHashMap<String, usize> = FxHashMap::default();
|
||||
for (idx, postcode) in postcodes.iter().enumerate() {
|
||||
postcode_to_idx.insert(postcode.clone(), idx);
|
||||
}
|
||||
|
||||
info!(postcodes = postcodes.len(), "Postcode boundary data ready");
|
||||
|
||||
Ok(PostcodeData {
|
||||
postcodes,
|
||||
polygons,
|
||||
centroids,
|
||||
postcode_to_idx,
|
||||
})
|
||||
}
|
||||
}
|
||||
1073
server-rs/src/data/property.rs
Normal file
1073
server-rs/src/data/property.rs
Normal file
File diff suppressed because it is too large
Load diff
232
server-rs/src/data/travel_time.rs
Normal file
232
server-rs/src/data/travel_time.rs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
use std::collections::VecDeque;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use parking_lot::Mutex;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use tracing::info;
|
||||
|
||||
/// Cached postcode → travel_minutes mapping for a single destination file.
|
||||
pub type TravelData = Arc<FxHashMap<String, i16>>;
|
||||
|
||||
/// Simple LRU cache for travel time data, limited to `capacity` entries.
|
||||
struct LruCache {
|
||||
map: FxHashMap<(String, String), TravelData>,
|
||||
order: VecDeque<(String, String)>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl LruCache {
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
map: FxHashMap::default(),
|
||||
order: VecDeque::with_capacity(capacity),
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&mut self, key: &(String, String)) -> Option<TravelData> {
|
||||
if let Some(data) = self.map.get(key) {
|
||||
// Move to front (most recently used)
|
||||
if let Some(pos) = self.order.iter().position(|k| k == key) {
|
||||
self.order.remove(pos);
|
||||
self.order.push_front(key.clone());
|
||||
}
|
||||
Some(data.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: (String, String), data: TravelData) {
|
||||
if self.map.contains_key(&key) {
|
||||
self.map.insert(key.clone(), data);
|
||||
if let Some(pos) = self.order.iter().position(|k| k == &key) {
|
||||
self.order.remove(pos);
|
||||
}
|
||||
self.order.push_front(key);
|
||||
} else {
|
||||
while self.map.len() >= self.capacity {
|
||||
if let Some(old_key) = self.order.pop_back() {
|
||||
self.map.remove(&old_key);
|
||||
}
|
||||
}
|
||||
self.map.insert(key.clone(), data);
|
||||
self.order.push_front(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages on-demand loading and caching of precomputed travel time parquet files.
|
||||
///
|
||||
/// Directory structure: `{base_dir}/{mode}/{slug}.parquet`
|
||||
/// Each parquet file has columns: `pcds` (String), `travel_minutes` (Int16).
|
||||
pub struct TravelTimeStore {
|
||||
base_dir: PathBuf,
|
||||
/// Available transport modes (subdirectory names, e.g., "bicycle")
|
||||
pub available_modes: Vec<String>,
|
||||
/// mode → set of destination slugs (filenames without .parquet)
|
||||
pub destinations: FxHashMap<String, FxHashSet<String>>,
|
||||
cache: Mutex<LruCache>,
|
||||
}
|
||||
|
||||
impl TravelTimeStore {
|
||||
/// Scan the travel-times directory to discover available modes and destinations.
|
||||
pub fn load(base_dir: &Path, cache_capacity: usize) -> anyhow::Result<Self> {
|
||||
let mut available_modes = Vec::new();
|
||||
let mut destinations: FxHashMap<String, FxHashSet<String>> = FxHashMap::default();
|
||||
|
||||
for entry in std::fs::read_dir(base_dir)
|
||||
.with_context(|| format!("Failed to read travel-times dir: {}", base_dir.display()))?
|
||||
{
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if !path.is_dir() {
|
||||
continue;
|
||||
}
|
||||
let mode = entry.file_name().to_string_lossy().to_string();
|
||||
|
||||
let mut slugs = FxHashSet::default();
|
||||
for file_entry in std::fs::read_dir(&path)
|
||||
.with_context(|| format!("Failed to read mode dir: {}", path.display()))?
|
||||
{
|
||||
let file_entry = file_entry?;
|
||||
let file_name = file_entry.file_name();
|
||||
let file_name = file_name.to_string_lossy();
|
||||
if file_name.ends_with(".parquet") {
|
||||
let slug = file_name.trim_end_matches(".parquet").to_string();
|
||||
slugs.insert(slug);
|
||||
}
|
||||
}
|
||||
|
||||
if !slugs.is_empty() {
|
||||
info!(
|
||||
mode = mode.as_str(),
|
||||
destinations = slugs.len(),
|
||||
"Travel time mode discovered"
|
||||
);
|
||||
available_modes.push(mode.clone());
|
||||
destinations.insert(mode, slugs);
|
||||
}
|
||||
}
|
||||
|
||||
available_modes.sort();
|
||||
|
||||
Ok(Self {
|
||||
base_dir: base_dir.to_path_buf(),
|
||||
available_modes,
|
||||
destinations,
|
||||
cache: Mutex::new(LruCache::new(cache_capacity)),
|
||||
})
|
||||
}
|
||||
|
||||
/// Load travel time data for a given mode and destination slug.
|
||||
/// Returns a cached or freshly-loaded postcode → travel_minutes mapping.
|
||||
pub fn get(&self, mode: &str, slug: &str) -> anyhow::Result<TravelData> {
|
||||
let key = (mode.to_string(), slug.to_string());
|
||||
|
||||
// Check cache first
|
||||
{
|
||||
let mut cache = self.cache.lock();
|
||||
if let Some(data) = cache.get(&key) {
|
||||
return Ok(data);
|
||||
}
|
||||
}
|
||||
|
||||
// Load from file (no lock held — harmless if two threads load the same file)
|
||||
let path = self
|
||||
.base_dir
|
||||
.join(mode)
|
||||
.join(format!("{}.parquet", slug));
|
||||
if !path.exists() {
|
||||
bail!("Travel time file not found: {}", path.display());
|
||||
}
|
||||
|
||||
let df = LazyFrame::scan_parquet(&path, Default::default())
|
||||
.with_context(|| format!("Failed to scan: {}", path.display()))?
|
||||
.collect()
|
||||
.with_context(|| format!("Failed to read: {}", path.display()))?;
|
||||
|
||||
let postcodes = df
|
||||
.column("pcds")
|
||||
.context("Missing 'pcds' column")?
|
||||
.str()
|
||||
.context("'pcds' is not string")?;
|
||||
let minutes = df
|
||||
.column("travel_minutes")
|
||||
.context("Missing 'travel_minutes' column")?
|
||||
.i16()
|
||||
.context("'travel_minutes' is not i16")?;
|
||||
|
||||
let mut map = FxHashMap::default();
|
||||
map.reserve(df.height());
|
||||
for (pc, min) in postcodes.into_iter().zip(minutes.into_iter()) {
|
||||
if let (Some(pc), Some(min)) = (pc, min) {
|
||||
map.insert(pc.to_string(), min);
|
||||
}
|
||||
}
|
||||
|
||||
let data: TravelData = Arc::new(map);
|
||||
|
||||
// Insert into cache
|
||||
{
|
||||
let mut cache = self.cache.lock();
|
||||
cache.insert(key, data.clone());
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
/// Check if a mode + slug combination is available.
|
||||
pub fn has_destination(&self, mode: &str, slug: &str) -> bool {
|
||||
self.destinations
|
||||
.get(mode)
|
||||
.map(|slugs| slugs.contains(slug))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Slugify a place name to match travel time file naming convention.
|
||||
/// "Abbey Hey" → "abbey-hey", "A'Bhuaile Ghlas" → "a-bhuaile-ghlas"
|
||||
pub fn slugify(name: &str) -> String {
|
||||
let mut result = String::with_capacity(name.len());
|
||||
let mut last_was_hyphen = true; // Start true to skip leading hyphens
|
||||
for ch in name.chars() {
|
||||
if ch.is_ascii_alphanumeric() {
|
||||
result.push(ch.to_ascii_lowercase());
|
||||
last_was_hyphen = false;
|
||||
} else if !last_was_hyphen {
|
||||
result.push('-');
|
||||
last_was_hyphen = true;
|
||||
}
|
||||
}
|
||||
if result.ends_with('-') {
|
||||
result.pop();
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn slugify_basic() {
|
||||
assert_eq!(slugify("Abbey Hey"), "abbey-hey");
|
||||
assert_eq!(slugify("Abbots Bickington"), "abbots-bickington");
|
||||
assert_eq!(slugify("London"), "london");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slugify_special_chars() {
|
||||
assert_eq!(slugify("A'Bhuaile Ghlas"), "a-bhuaile-ghlas");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slugify_edges() {
|
||||
assert_eq!(slugify(" Hello "), "hello");
|
||||
assert_eq!(slugify("Abbey"), "abbey");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue