336 lines
12 KiB
Rust
336 lines
12 KiB
Rust
use anyhow::Context;
|
|
use rayon::prelude::*;
|
|
use rustc_hash::FxHashMap;
|
|
use serde::Deserialize;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
use tracing::{debug, info};
|
|
|
|
use super::PlaceData;
|
|
|
|
/// Precomputed outcode data derived from postcode boundaries.
|
|
/// An outcode is the first part of a UK postcode (e.g. "E14" from "E14 2DG").
|
|
pub struct OutcodeData {
|
|
pub names: Vec<String>,
|
|
pub name_lower: Vec<String>,
|
|
pub centroids: Vec<(f32, f32)>,
|
|
pub cities: Vec<Option<String>>,
|
|
}
|
|
|
|
impl OutcodeData {
|
|
/// Derive outcode data by grouping postcodes by their outcode prefix and averaging centroids.
|
|
pub fn from_postcode_and_place_data(
|
|
postcode_data: &PostcodeData,
|
|
place_data: &PlaceData,
|
|
) -> Self {
|
|
// Group postcode centroids by outcode
|
|
let mut outcode_centroids: FxHashMap<String, Vec<(f32, f32)>> = FxHashMap::default();
|
|
for (idx, postcode) in postcode_data.postcodes.iter().enumerate() {
|
|
if let Some(space_idx) = postcode.find(' ') {
|
|
let outcode = &postcode[..space_idx];
|
|
outcode_centroids
|
|
.entry(outcode.to_string())
|
|
.or_default()
|
|
.push(postcode_data.centroids[idx]);
|
|
}
|
|
}
|
|
|
|
// Build sorted vecs
|
|
let mut entries: Vec<(String, (f32, f32))> = outcode_centroids
|
|
.into_iter()
|
|
.map(|(outcode, pts)| {
|
|
let count = pts.len() as f32;
|
|
let avg_lat = pts.iter().map(|(lat, _)| lat).sum::<f32>() / count;
|
|
let avg_lon = pts.iter().map(|(_, lon)| lon).sum::<f32>() / count;
|
|
(outcode, (avg_lat, avg_lon))
|
|
})
|
|
.collect();
|
|
entries.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
|
|
|
let names: Vec<String> = entries.iter().map(|(n, _)| n.clone()).collect();
|
|
let name_lower: Vec<String> = names.iter().map(|n| n.to_lowercase()).collect();
|
|
let centroids: Vec<(f32, f32)> = entries.iter().map(|(_, c)| *c).collect();
|
|
|
|
// Compute nearest city for each outcode (same algorithm as PlaceData)
|
|
let city_indices: Vec<usize> = place_data
|
|
.type_rank
|
|
.iter()
|
|
.enumerate()
|
|
.filter_map(|(idx, &rank)| if rank == 0 { Some(idx) } else { None })
|
|
.collect();
|
|
|
|
let cities: Vec<Option<String>> = centroids
|
|
.iter()
|
|
.map(|&(lat, lon)| {
|
|
let cos_lat = lat.to_radians().cos();
|
|
let mut best_dist_sq = f32::MAX;
|
|
let mut best_city: Option<&str> = None;
|
|
for &ci in &city_indices {
|
|
let dlat = place_data.lat[ci] - lat;
|
|
let dlon = (place_data.lon[ci] - lon) * cos_lat;
|
|
let dist_sq = dlat * dlat + dlon * dlon;
|
|
if dist_sq < best_dist_sq {
|
|
best_dist_sq = dist_sq;
|
|
best_city = Some(&place_data.name[ci]);
|
|
}
|
|
}
|
|
// ~100km threshold
|
|
if best_dist_sq < 0.81 {
|
|
best_city.map(|s| s.to_string())
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
info!(outcodes = names.len(), "Outcode data derived from postcodes");
|
|
|
|
OutcodeData {
|
|
names,
|
|
name_lower,
|
|
centroids,
|
|
cities,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// GeoJSON structures for parsing postcode boundary files
|
|
#[derive(Deserialize)]
|
|
struct FeatureCollection {
|
|
features: Vec<Feature>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct Feature {
|
|
geometry: Geometry,
|
|
properties: Properties,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
#[serde(tag = "type")]
|
|
enum Geometry {
|
|
Polygon {
|
|
coordinates: Vec<Vec<[f64; 2]>>,
|
|
},
|
|
MultiPolygon {
|
|
coordinates: Vec<Vec<Vec<[f64; 2]>>>,
|
|
},
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
struct Properties {
|
|
postcodes: String,
|
|
}
|
|
|
|
/// Postcode boundary data: polygon vertices and spatial index for fast queries.
|
|
pub struct PostcodeData {
|
|
/// Postcode strings
|
|
pub postcodes: Vec<String>,
|
|
/// Centroid (lat, lon) for lookups
|
|
pub centroids: Vec<(f32, f32)>,
|
|
/// Precomputed AABB per postcode: (south, west, north, east) as f32
|
|
pub aabbs: Vec<(f32, f32, f32, f32)>,
|
|
/// Precomputed GeoJSON geometry Value per postcode
|
|
pub geometries: Vec<serde_json::Value>,
|
|
/// Lookup from postcode string to index
|
|
pub postcode_to_idx: FxHashMap<String, usize>,
|
|
}
|
|
|
|
impl PostcodeData {
|
|
/// Load postcode boundaries from a directory of GeoJSON files.
|
|
/// Expects the directory to have a `units/` subdirectory containing .geojson files.
|
|
pub fn load(dir_path: &Path) -> anyhow::Result<Self> {
|
|
info!("Loading postcode boundaries from {:?}", dir_path);
|
|
|
|
let units_dir = dir_path.join("units");
|
|
if !units_dir.exists() {
|
|
anyhow::bail!(
|
|
"Expected 'units' subdirectory in postcode boundaries path: {:?}",
|
|
dir_path
|
|
);
|
|
}
|
|
|
|
let mut postcodes: Vec<String> = Vec::new();
|
|
let mut polygons: Vec<Vec<Vec<[f32; 2]>>> = Vec::new();
|
|
let mut centroids: Vec<(f32, f32)> = Vec::new();
|
|
|
|
// Read all .geojson files in the units directory
|
|
let mut entries: Vec<_> = fs::read_dir(&units_dir)
|
|
.with_context(|| format!("Failed to read directory: {:?}", units_dir))?
|
|
.filter_map(|entry| entry.ok())
|
|
.filter(|entry| {
|
|
entry
|
|
.path()
|
|
.extension()
|
|
.map(|ext| ext == "geojson")
|
|
.unwrap_or(false)
|
|
})
|
|
.collect();
|
|
|
|
entries.sort_by_key(|entry| entry.path());
|
|
|
|
info!(files = entries.len(), "Found GeoJSON files to process");
|
|
|
|
// Parse files in parallel
|
|
let file_results: Vec<_> = entries
|
|
.into_par_iter()
|
|
.map(|entry| {
|
|
let file_path = entry.path();
|
|
let content = fs::read_to_string(&file_path)
|
|
.with_context(|| format!("Failed to read file: {:?}", file_path))?;
|
|
|
|
let collection: FeatureCollection = serde_json::from_str(&content)
|
|
.with_context(|| format!("Failed to parse GeoJSON: {:?}", file_path))?;
|
|
|
|
let mut local_postcodes = Vec::new();
|
|
let mut local_polygons = Vec::new();
|
|
let mut local_centroids = Vec::new();
|
|
let mut local_aabbs: Vec<(f32, f32, f32, f32)> = Vec::new();
|
|
|
|
for feature in collection.features {
|
|
let postcode = feature.properties.postcodes;
|
|
|
|
// Extract all outer rings from the geometry
|
|
let rings: Vec<Vec<[f32; 2]>> = match feature.geometry {
|
|
Geometry::Polygon { coordinates } => coordinates
|
|
.first()
|
|
.map(|ring| {
|
|
vec![ring
|
|
.iter()
|
|
.map(|[lon, lat]| [*lon as f32, *lat as f32])
|
|
.collect()]
|
|
})
|
|
.unwrap_or_default(),
|
|
Geometry::MultiPolygon { coordinates } => coordinates
|
|
.iter()
|
|
.filter_map(|poly| {
|
|
poly.first().map(|ring| {
|
|
ring.iter()
|
|
.map(|[lon, lat]| [*lon as f32, *lat as f32])
|
|
.collect()
|
|
})
|
|
})
|
|
.collect(),
|
|
};
|
|
|
|
// Compute centroid across all vertices from all rings
|
|
let total_vertices: usize = rings.iter().map(|ring| ring.len()).sum();
|
|
let centroid = if total_vertices == 0 {
|
|
tracing::warn!(postcode = %postcode, "Postcode polygon has zero vertices, defaulting centroid to (0,0)");
|
|
(0.0, 0.0)
|
|
} else {
|
|
let mut sum_lat: f32 = 0.0;
|
|
let mut sum_lon: f32 = 0.0;
|
|
for ring in &rings {
|
|
for &[lon, lat] in ring {
|
|
sum_lat += lat;
|
|
sum_lon += lon;
|
|
}
|
|
}
|
|
let count = total_vertices as f32;
|
|
(sum_lat / count, sum_lon / count)
|
|
};
|
|
|
|
// Compute AABB across all rings
|
|
let (mut aabb_south, mut aabb_north) = (f32::INFINITY, f32::NEG_INFINITY);
|
|
let (mut aabb_west, mut aabb_east) = (f32::INFINITY, f32::NEG_INFINITY);
|
|
for ring in &rings {
|
|
for &[lon, lat] in ring {
|
|
if lat < aabb_south {
|
|
aabb_south = lat;
|
|
}
|
|
if lat > aabb_north {
|
|
aabb_north = lat;
|
|
}
|
|
if lon < aabb_west {
|
|
aabb_west = lon;
|
|
}
|
|
if lon > aabb_east {
|
|
aabb_east = lon;
|
|
}
|
|
}
|
|
}
|
|
|
|
local_postcodes.push(postcode);
|
|
local_polygons.push(rings);
|
|
local_centroids.push(centroid);
|
|
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
|
|
}
|
|
|
|
Ok::<_, anyhow::Error>((
|
|
local_postcodes,
|
|
local_polygons,
|
|
local_centroids,
|
|
local_aabbs,
|
|
))
|
|
})
|
|
.collect::<Result<Vec<_>, _>>()?;
|
|
|
|
let mut aabbs: Vec<(f32, f32, f32, f32)> = Vec::new();
|
|
|
|
// Flatten results
|
|
for (local_postcodes, local_polygons, local_centroids, local_aabbs) in file_results {
|
|
postcodes.extend(local_postcodes);
|
|
polygons.extend(local_polygons);
|
|
centroids.extend(local_centroids);
|
|
aabbs.extend(local_aabbs);
|
|
}
|
|
|
|
debug!(
|
|
postcodes = postcodes.len(),
|
|
"Extracted postcodes from GeoJSON"
|
|
);
|
|
|
|
// Build postcode -> index lookup
|
|
let mut postcode_to_idx: FxHashMap<String, usize> = FxHashMap::default();
|
|
for (idx, postcode) in postcodes.iter().enumerate() {
|
|
postcode_to_idx.insert(postcode.clone(), idx);
|
|
}
|
|
|
|
// Precompute GeoJSON geometry for each postcode
|
|
let geometries: Vec<serde_json::Value> = polygons
|
|
.iter()
|
|
.map(|rings| {
|
|
if rings.len() == 1 {
|
|
let coords: Vec<serde_json::Value> = rings[0]
|
|
.iter()
|
|
.map(|[lon, lat]| {
|
|
serde_json::Value::Array(vec![
|
|
serde_json::Value::from(*lon as f64),
|
|
serde_json::Value::from(*lat as f64),
|
|
])
|
|
})
|
|
.collect();
|
|
serde_json::json!({"type": "Polygon", "coordinates": [coords]})
|
|
} else {
|
|
let polys: Vec<serde_json::Value> = rings
|
|
.iter()
|
|
.map(|ring| {
|
|
let coords: Vec<serde_json::Value> = ring
|
|
.iter()
|
|
.map(|[lon, lat]| {
|
|
serde_json::Value::Array(vec![
|
|
serde_json::Value::from(*lon as f64),
|
|
serde_json::Value::from(*lat as f64),
|
|
])
|
|
})
|
|
.collect();
|
|
serde_json::Value::Array(vec![serde_json::Value::Array(coords)])
|
|
})
|
|
.collect();
|
|
serde_json::json!({"type": "MultiPolygon", "coordinates": polys})
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
info!(postcodes = postcodes.len(), "Postcode boundary data ready");
|
|
|
|
Ok(PostcodeData {
|
|
postcodes,
|
|
centroids,
|
|
aabbs,
|
|
geometries,
|
|
postcode_to_idx,
|
|
})
|
|
}
|
|
}
|