use anyhow::Context; use rayon::prelude::*; use rustc_hash::FxHashMap; use serde::Deserialize; use std::fs; use std::path::Path; use tracing::{debug, info}; use super::PlaceData; /// Precomputed outcode data derived from postcode boundaries. /// An outcode is the first part of a UK postcode (e.g. "E14" from "E14 2DG"). pub struct OutcodeData { pub names: Vec, pub name_lower: Vec, pub centroids: Vec<(f32, f32)>, pub cities: Vec>, } impl OutcodeData { /// Derive outcode data by grouping postcodes by their outcode prefix and averaging centroids. pub fn from_postcode_and_place_data( postcode_data: &PostcodeData, place_data: &PlaceData, ) -> Self { // Group postcode centroids by outcode let mut outcode_centroids: FxHashMap> = FxHashMap::default(); for (idx, postcode) in postcode_data.postcodes.iter().enumerate() { if let Some(space_idx) = postcode.find(' ') { let outcode = &postcode[..space_idx]; outcode_centroids .entry(outcode.to_string()) .or_default() .push(postcode_data.centroids[idx]); } } // Build sorted vecs let mut entries: Vec<(String, (f32, f32))> = outcode_centroids .into_iter() .map(|(outcode, pts)| { let count = pts.len() as f32; let avg_lat = pts.iter().map(|(lat, _)| lat).sum::() / count; let avg_lon = pts.iter().map(|(_, lon)| lon).sum::() / count; (outcode, (avg_lat, avg_lon)) }) .collect(); entries.sort_unstable_by(|a, b| a.0.cmp(&b.0)); let names: Vec = entries.iter().map(|(n, _)| n.clone()).collect(); let name_lower: Vec = names.iter().map(|n| n.to_lowercase()).collect(); let centroids: Vec<(f32, f32)> = entries.iter().map(|(_, c)| *c).collect(); // Compute nearest city for each outcode (same algorithm as PlaceData) let city_indices: Vec = place_data .type_rank .iter() .enumerate() .filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None }) .collect(); let cities: Vec> = centroids .iter() .map(|&(lat, lon)| { let cos_lat = lat.to_radians().cos(); let mut best_dist_sq = f32::MAX; let mut best_city: Option<&str> = None; for &ci in &city_indices { let dlat = place_data.lat[ci] - lat; let dlon = (place_data.lon[ci] - lon) * cos_lat; let dist_sq = dlat * dlat + dlon * dlon; if dist_sq < best_dist_sq { best_dist_sq = dist_sq; best_city = Some(&place_data.name[ci]); } } // ~100km threshold if best_dist_sq < 0.81 { best_city.map(|s| s.to_string()) } else { None } }) .collect(); info!(outcodes = names.len(), "Outcode data derived from postcodes"); OutcodeData { names, name_lower, centroids, cities, } } } /// GeoJSON structures for parsing postcode boundary files #[derive(Deserialize)] struct FeatureCollection { features: Vec, } #[derive(Deserialize)] struct Feature { geometry: Geometry, properties: Properties, } #[derive(Deserialize)] #[serde(tag = "type")] enum Geometry { Polygon { coordinates: Vec>, }, MultiPolygon { coordinates: Vec>>, }, } #[derive(Deserialize)] struct Properties { postcodes: String, } /// Postcode boundary data: polygon vertices and spatial index for fast queries. pub struct PostcodeData { /// Postcode strings pub postcodes: Vec, /// Centroid (lat, lon) for lookups pub centroids: Vec<(f32, f32)>, /// Precomputed AABB per postcode: (south, west, north, east) as f32 pub aabbs: Vec<(f32, f32, f32, f32)>, /// Precomputed GeoJSON geometry Value per postcode pub geometries: Vec, /// Lookup from postcode string to index pub postcode_to_idx: FxHashMap, } impl PostcodeData { /// Load postcode boundaries from a directory of GeoJSON files. /// Expects the directory to have a `units/` subdirectory containing .geojson files. pub fn load(dir_path: &Path) -> anyhow::Result { info!("Loading postcode boundaries from {:?}", dir_path); let units_dir = dir_path.join("units"); if !units_dir.exists() { anyhow::bail!( "Expected 'units' subdirectory in postcode boundaries path: {:?}", dir_path ); } let mut postcodes: Vec = Vec::new(); let mut polygons: Vec>> = Vec::new(); let mut centroids: Vec<(f32, f32)> = Vec::new(); // Read all .geojson files in the units directory let mut entries: Vec<_> = fs::read_dir(&units_dir) .with_context(|| format!("Failed to read directory: {:?}", units_dir))? .filter_map(|entry| entry.ok()) .filter(|entry| { entry .path() .extension() .map(|ext| ext == "geojson") .unwrap_or(false) }) .collect(); entries.sort_by_key(|entry| entry.path()); info!(files = entries.len(), "Found GeoJSON files to process"); // Parse files in parallel let file_results: Vec<_> = entries .into_par_iter() .map(|entry| { let file_path = entry.path(); let content = fs::read_to_string(&file_path) .with_context(|| format!("Failed to read file: {:?}", file_path))?; let collection: FeatureCollection = serde_json::from_str(&content) .with_context(|| format!("Failed to parse GeoJSON: {:?}", file_path))?; let mut local_postcodes = Vec::new(); let mut local_polygons = Vec::new(); let mut local_centroids = Vec::new(); let mut local_aabbs: Vec<(f32, f32, f32, f32)> = Vec::new(); for feature in collection.features { let postcode = feature.properties.postcodes; // Extract all outer rings from the geometry let rings: Vec> = match feature.geometry { Geometry::Polygon { coordinates } => coordinates .first() .map(|ring| { vec![ring .iter() .map(|[lon, lat]| [*lon as f32, *lat as f32]) .collect()] }) .unwrap_or_default(), Geometry::MultiPolygon { coordinates } => coordinates .iter() .filter_map(|poly| { poly.first().map(|ring| { ring.iter() .map(|[lon, lat]| [*lon as f32, *lat as f32]) .collect() }) }) .collect(), }; // Compute centroid across all vertices from all rings let total_vertices: usize = rings.iter().map(|ring| ring.len()).sum(); let centroid = if total_vertices == 0 { tracing::warn!(postcode = %postcode, "Postcode polygon has zero vertices, defaulting centroid to (0,0)"); (0.0, 0.0) } else { let mut sum_lat: f32 = 0.0; let mut sum_lon: f32 = 0.0; for ring in &rings { for &[lon, lat] in ring { sum_lat += lat; sum_lon += lon; } } let count = total_vertices as f32; (sum_lat / count, sum_lon / count) }; // Compute AABB across all rings let (mut aabb_south, mut aabb_north) = (f32::INFINITY, f32::NEG_INFINITY); let (mut aabb_west, mut aabb_east) = (f32::INFINITY, f32::NEG_INFINITY); for ring in &rings { for &[lon, lat] in ring { if lat < aabb_south { aabb_south = lat; } if lat > aabb_north { aabb_north = lat; } if lon < aabb_west { aabb_west = lon; } if lon > aabb_east { aabb_east = lon; } } } local_postcodes.push(postcode); local_polygons.push(rings); local_centroids.push(centroid); local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east)); } Ok::<_, anyhow::Error>(( local_postcodes, local_polygons, local_centroids, local_aabbs, )) }) .collect::, _>>()?; let mut aabbs: Vec<(f32, f32, f32, f32)> = Vec::new(); // Flatten results for (local_postcodes, local_polygons, local_centroids, local_aabbs) in file_results { postcodes.extend(local_postcodes); polygons.extend(local_polygons); centroids.extend(local_centroids); aabbs.extend(local_aabbs); } debug!( postcodes = postcodes.len(), "Extracted postcodes from GeoJSON" ); // Build postcode -> index lookup let mut postcode_to_idx: FxHashMap = FxHashMap::default(); for (idx, postcode) in postcodes.iter().enumerate() { postcode_to_idx.insert(postcode.clone(), idx); } // Precompute GeoJSON geometry for each postcode let geometries: Vec = polygons .iter() .map(|rings| { if rings.len() == 1 { let coords: Vec = rings[0] .iter() .map(|[lon, lat]| { serde_json::Value::Array(vec![ serde_json::Value::from(*lon as f64), serde_json::Value::from(*lat as f64), ]) }) .collect(); serde_json::json!({"type": "Polygon", "coordinates": [coords]}) } else { let polys: Vec = rings .iter() .map(|ring| { let coords: Vec = ring .iter() .map(|[lon, lat]| { serde_json::Value::Array(vec![ serde_json::Value::from(*lon as f64), serde_json::Value::from(*lat as f64), ]) }) .collect(); serde_json::Value::Array(vec![serde_json::Value::Array(coords)]) }) .collect(); serde_json::json!({"type": "MultiPolygon", "coordinates": polys}) } }) .collect(); info!(postcodes = postcodes.len(), "Postcode boundary data ready"); Ok(PostcodeData { postcodes, centroids, aabbs, geometries, postcode_to_idx, }) } }