diff --git a/server-rs/src/data.rs b/server-rs/src/data.rs index 7a7c181..6f0e5a9 100644 --- a/server-rs/src/data.rs +++ b/server-rs/src/data.rs @@ -4,4 +4,4 @@ mod property; pub use poi::{POICategoryGroup, POIData}; pub use postcodes::PostcodeData; -pub use property::{precompute_h3, Histogram, PropertyData}; +pub use property::{compute_feature_stats, precompute_h3, Histogram, PropertyData}; diff --git a/server-rs/src/main.rs b/server-rs/src/main.rs index 8271d6d..1e3a7fd 100644 --- a/server-rs/src/main.rs +++ b/server-rs/src/main.rs @@ -5,6 +5,8 @@ mod metrics; mod og_middleware; pub mod parsing; mod routes; +#[cfg(test)] +mod semantic_tests; mod state; pub mod utils; @@ -147,6 +149,13 @@ async fn main() -> anyhow::Result<()> { let tile_reader = Arc::new(routes::init_tile_reader(tiles_path).await?); info!("PMTiles loaded successfully"); + let feature_name_to_index: rustc_hash::FxHashMap = property_data + .feature_names + .iter() + .enumerate() + .map(|(idx, name)| (name.clone(), idx)) + .collect(); + let min_keys: Vec = property_data .feature_names .iter() @@ -219,6 +228,7 @@ async fn main() -> anyhow::Result<()> { poi_data, poi_grid, postcode_data, + feature_name_to_index, min_keys, max_keys, poi_category_groups, @@ -237,6 +247,7 @@ async fn main() -> anyhow::Result<()> { let state_features = state.clone(); let state_hexagons = state.clone(); let state_postcodes = state.clone(); + let state_postcode_lookup = state.clone(); let state_pois = state.clone(); let state_poi_categories = state.clone(); let state_hexagon_properties = state.clone(); @@ -257,6 +268,10 @@ async fn main() -> anyhow::Result<()> { "/api/postcodes", get(move |query| routes::get_postcodes(state_postcodes.clone(), query)), ) + .route( + "/api/postcode/{postcode}", + get(move |path| routes::get_postcode_lookup(state_postcode_lookup.clone(), path)), + ) .route( "/api/pois", get(move |query| routes::get_pois(state_pois.clone(), query)), diff --git a/server-rs/src/parsing.rs b/server-rs/src/parsing.rs index 4bcfe19..ab1ec60 100644 --- a/server-rs/src/parsing.rs +++ b/server-rs/src/parsing.rs @@ -1,5 +1,5 @@ mod bounds; mod filters; -pub use bounds::{h3_cell_bounds, parse_bounds}; +pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds}; pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter}; diff --git a/server-rs/src/parsing/bounds.rs b/server-rs/src/parsing/bounds.rs index b7ab586..462891b 100644 --- a/server-rs/src/parsing/bounds.rs +++ b/server-rs/src/parsing/bounds.rs @@ -1,11 +1,29 @@ use axum::http::StatusCode; -/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees. +/// Check if two bounding boxes intersect. +/// Both boxes are (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon). +#[inline] +pub fn bounds_intersect( + a_south: f64, + a_west: f64, + a_north: f64, + a_east: f64, + b_south: f64, + b_west: f64, + b_north: f64, + b_east: f64, +) -> bool { + a_west <= b_east && a_east >= b_west && a_south <= b_north && a_north >= b_south +} + +/// Compute the lat/lon bounding box of an H3 cell in degrees, with a configurable buffer in degrees. +/// Returns (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon). pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) { let boundary = cell.boundary(); let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY); let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY); for vertex in boundary.iter() { + // h3o LatLng::lat()/lng() return degrees let lat = vertex.lat(); let lon = vertex.lng(); if lat < min_lat { @@ -81,4 +99,42 @@ mod tests { assert!((buf_max_lat - max_lat - 0.1).abs() < 1e-10); assert!((buf_max_lon - max_lon - 0.1).abs() < 1e-10); } + + #[test] + fn h3_cell_bounds_returns_degrees_not_radians() { + // Cell "8928308280fffff" is in San Francisco area (~37.77°N, ~-122.4°W) + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.0); + + // If h3o returned radians, values would be < π ≈ 3.14 + // Latitude ~37.77° proves we're getting degrees, not radians + assert!(min_lat > 30.0 && min_lat < 45.0, "min_lat {} should be ~37° (degrees)", min_lat); + assert!(max_lat > 30.0 && max_lat < 45.0, "max_lat {} should be ~37° (degrees)", max_lat); + + // Longitude ~-122° also proves degrees (radians would be < π) + assert!(min_lon < -100.0, "min_lon {} should be ~-122° (degrees)", min_lon); + assert!(max_lon < -100.0, "max_lon {} should be ~-122° (degrees)", max_lon); + } + + #[test] + fn bounds_intersect_overlapping() { + // Two overlapping boxes + assert!(bounds_intersect(0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0)); + // Box B is inside box A + assert!(bounds_intersect(0.0, 0.0, 10.0, 10.0, 2.0, 2.0, 5.0, 5.0)); + // Box A is inside box B + assert!(bounds_intersect(2.0, 2.0, 5.0, 5.0, 0.0, 0.0, 10.0, 10.0)); + // Touching at edge + assert!(bounds_intersect(0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0)); + } + + #[test] + fn bounds_intersect_non_overlapping() { + // Box B is to the right of box A + assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0)); + // Box B is above box A + assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0)); + // Completely separate + assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0)); + } } diff --git a/server-rs/src/parsing/filters.rs b/server-rs/src/parsing/filters.rs index 33ba76e..e098636 100644 --- a/server-rs/src/parsing/filters.rs +++ b/server-rs/src/parsing/filters.rs @@ -1,4 +1,4 @@ -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; /// Filter for numeric features: value must be in [min, max] range. pub struct ParsedFilter { @@ -8,9 +8,11 @@ pub struct ParsedFilter { } /// Filter for enum features: value must be one of the allowed indices. +/// Uses FxHashSet (f32 bits) for O(1) lookups instead of O(n) Vec::contains. pub struct ParsedEnumFilter { pub feat_idx: usize, - pub allowed: Vec, + /// Allowed enum indices stored as f32 bits for exact comparison + pub allowed: FxHashSet, } /// Parse comma-separated filter string into numeric and enum filters. @@ -18,7 +20,7 @@ pub struct ParsedEnumFilter { /// Enum format: `name:val1|val2|val3` (pipe-separated string values) pub fn parse_filters( filter_str: Option<&str>, - feature_names: &[String], + feature_name_to_index: &FxHashMap, enum_values: &FxHashMap>, ) -> (Vec, Vec) { let mut numeric = Vec::new(); @@ -37,22 +39,22 @@ pub fn parse_filters( let name = parts[0].trim(); let rest = parts[1].trim(); - // Find feature index by name - let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) else { + // Find feature index by name (O(1) lookup) + let Some(&feat_idx) = feature_name_to_index.get(name) else { continue; }; // Check if this is an enum feature if let Some(values) = enum_values.get(&feat_idx) { - // Enum filter: convert string values to f32 indices - let allowed: Vec = rest + // Enum filter: convert string values to f32 indices (stored as bits for O(1) lookup) + let allowed: FxHashSet = rest .split('|') .filter_map(|value| { let value = value.trim(); values .iter() .position(|existing| existing == value) - .map(|position| position as f32) + .map(|position| (position as f32).to_bits()) }) .collect(); enums.push(ParsedEnumFilter { feat_idx, allowed }); @@ -93,7 +95,8 @@ pub fn row_passes_filters( value.is_finite() && value >= filter.min && value <= filter.max }) && enum_filters.iter().all(|filter| { let value = feature_data[base + filter.feat_idx]; - value.is_finite() && filter.allowed.contains(&value) + // O(1) lookup using f32 bits as key + value.is_finite() && filter.allowed.contains(&value.to_bits()) }) } @@ -101,8 +104,10 @@ pub fn row_passes_filters( mod tests { use super::*; - fn feature_names() -> Vec { - vec!["price".into(), "area".into(), "rating".into()] + fn feature_name_to_index() -> FxHashMap { + [("price".into(), 0), ("area".into(), 1), ("rating".into(), 2)] + .into_iter() + .collect() } fn enum_values() -> FxHashMap> { @@ -113,7 +118,7 @@ mod tests { #[test] fn parse_filters_numeric() { - let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_names(), &enum_values()); + let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_name_to_index(), &enum_values()); assert_eq!(numeric.len(), 1); assert_eq!(numeric[0].feat_idx, 0); assert_eq!(numeric[0].min, 100.0); @@ -123,22 +128,25 @@ mod tests { #[test] fn parse_filters_enum() { - let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_names(), &enum_values()); + let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values()); assert!(numeric.is_empty()); assert_eq!(enums.len(), 1); assert_eq!(enums[0].feat_idx, 2); - assert_eq!(enums[0].allowed, vec![0.0, 2.0]); + // Allowed values are stored as f32 bits + assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // A = index 0 + assert!(enums[0].allowed.contains(&(2.0_f32).to_bits())); // C = index 2 + assert_eq!(enums[0].allowed.len(), 2); } #[test] fn parse_filters_empty_and_invalid() { - let (n, e) = parse_filters(None, &feature_names(), &enum_values()); + let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values()); assert!(n.is_empty() && e.is_empty()); - let (n, e) = parse_filters(Some(""), &feature_names(), &enum_values()); + let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values()); assert!(n.is_empty() && e.is_empty()); - let (n, e) = parse_filters(Some("unknown:1:2"), &feature_names(), &enum_values()); + let (n, e) = parse_filters(Some("unknown:1:2"), &feature_name_to_index(), &enum_values()); assert!(n.is_empty() && e.is_empty()); } diff --git a/server-rs/src/routes.rs b/server-rs/src/routes.rs index 129a473..ce54a39 100644 --- a/server-rs/src/routes.rs +++ b/server-rs/src/routes.rs @@ -12,6 +12,6 @@ pub use hexagon_stats::get_hexagon_stats; pub use hexagons::get_hexagons; pub use og_image::get_og_image; pub use pois::{get_poi_categories, get_pois}; -pub use postcodes::get_postcodes; +pub use postcodes::{get_postcode_lookup, get_postcodes}; pub use properties::get_hexagon_properties; pub use tiles::{get_style, get_tile, init_tile_reader}; diff --git a/server-rs/src/routes/hexagon_stats.rs b/server-rs/src/routes/hexagon_stats.rs index 937c63c..73df8c8 100644 --- a/server-rs/src/routes/hexagon_stats.rs +++ b/server-rs/src/routes/hexagon_stats.rs @@ -8,7 +8,7 @@ use axum::response::Json; use serde::{Deserialize, Serialize}; use tracing::{info, warn}; -use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS}; +use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN}; use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters}; use crate::state::AppState; @@ -87,7 +87,7 @@ pub async fn get_hexagon_stats( let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), - &state.data.feature_names, + &state.feature_name_to_index, &state.data.enum_values, ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); @@ -188,15 +188,17 @@ pub async fn get_hexagon_stats( let global_hist = &state.data.feature_stats[feature_index].histogram; let p1 = global_hist.p1; let p99 = global_hist.p99; + // Use same bin count as global histogram for consistency + let num_bins = global_hist.counts.len(); let mut count = 0usize; let mut min_value = f32::INFINITY; let mut max_value = f32::NEG_INFINITY; let mut sum = 0.0f64; - let mut bins = vec![0u64; HISTOGRAM_BINS]; + let mut bins = vec![0u64; num_bins]; // Compute middle bin width (between p1 and p99) - let middle_bins = HISTOGRAM_BINS.saturating_sub(2); + let middle_bins = num_bins.saturating_sub(2); let middle_width = if middle_bins > 0 && p99 > p1 { (p99 - p1) / middle_bins as f32 } else { @@ -219,13 +221,13 @@ pub async fn get_hexagon_stats( let bin = if value < p1 { 0 // Low outlier bin } else if value >= p99 { - HISTOGRAM_BINS - 1 // High outlier bin + num_bins - 1 // High outlier bin } else if middle_width > 0.0 { // Middle bins (1 to n-2) let middle_bin = ((value - p1) / middle_width) as usize; - (1 + middle_bin).min(HISTOGRAM_BINS - 2) + (1 + middle_bin).min(num_bins - 2) } else { - HISTOGRAM_BINS / 2 // Fallback if p1 == p99 + num_bins / 2 // Fallback if p1 == p99 }; bins[bin] += 1; } diff --git a/server-rs/src/routes/hexagons.rs b/server-rs/src/routes/hexagons.rs index f24210c..a8f64c3 100644 --- a/server-rs/src/routes/hexagons.rs +++ b/server-rs/src/routes/hexagons.rs @@ -9,7 +9,7 @@ use serde_json::{Map, Value}; use tracing::{info, warn}; use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN}; -use crate::parsing::{parse_bounds, parse_filters, row_passes_filters}; +use crate::parsing::{bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters}; use crate::state::AppState; #[derive(Serialize)] @@ -92,21 +92,29 @@ impl CellAgg { } } -/// Build feature maps from aggregated cell data. +/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds. fn build_feature_maps( groups: &FxHashMap, min_keys: &[String], max_keys: &[String], num_features: usize, indices: Option<&[usize]>, + query_bounds: (f64, f64, f64, f64), // (south, west, north, east) ) -> Vec> { let mut features = Vec::with_capacity(groups.len()); + let (q_south, q_west, q_north, q_east) = query_bounds; for (&cell_id, aggregation) in groups { let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else { continue; }; + // Filter out cells that don't intersect the query bounds + let (c_south, c_west, c_north, c_east) = h3_cell_bounds(cell, 0.0); + if !bounds_intersect(c_south, c_west, c_north, c_east, q_south, q_west, q_north, q_east) { + continue; + } + let mut map = Map::new(); map.insert("h3".into(), Value::String(cell.to_string())); map.insert("count".into(), Value::Number(aggregation.count.into())); @@ -166,7 +174,7 @@ pub async fn get_hexagons( let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), - &state.data.feature_names, + &state.feature_name_to_index, &state.data.enum_values, ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); @@ -185,11 +193,7 @@ pub async fn get_hexagons( if name.is_empty() { return None; } - state - .data - .feature_names - .iter() - .position(|feat| feat == name) + state.feature_name_to_index.get(name).copied() }) .collect() }); @@ -209,20 +213,6 @@ pub async fn get_hexagons( let mut groups: FxHashMap = FxHashMap::default(); - let has_selective = field_indices.is_some(); - let sel_indices = field_indices.as_deref().unwrap_or(&[]); - - let aggregate_row = |groups: &mut FxHashMap, cell_id: u64, row: usize| { - let aggregation = groups - .entry(cell_id) - .or_insert_with(|| CellAgg::new(num_features)); - if has_selective { - aggregation.add_row_selective(feature_data, row, num_features, sel_indices); - } else { - aggregation.add_row(feature_data, row, num_features); - } - }; - let cell_for_row = |row: usize| -> u64 { let max_cell = precomputed[row]; if !need_parent || max_cell == 0 { @@ -235,21 +225,48 @@ pub async fn get_hexagons( .unwrap_or(0) }; - state - .grid - .for_each_in_bounds(south, west, north, east, |row_idx| { - let row = row_idx as usize; - if !row_passes_filters( - row, - &parsed_filters, - &parsed_enum_filters, - feature_data, - num_features, - ) { - return; - } - aggregate_row(&mut groups, cell_for_row(row), row); - }); + // Hoist has_selective branch outside the hot loop to avoid per-row branching + if let Some(sel_indices) = field_indices.as_deref() { + state + .grid + .for_each_in_bounds(south, west, north, east, |row_idx| { + let row = row_idx as usize; + if !row_passes_filters( + row, + &parsed_filters, + &parsed_enum_filters, + feature_data, + num_features, + ) { + return; + } + let cell_id = cell_for_row(row); + let aggregation = groups + .entry(cell_id) + .or_insert_with(|| CellAgg::new(num_features)); + aggregation.add_row_selective(feature_data, row, num_features, sel_indices); + }); + } else { + state + .grid + .for_each_in_bounds(south, west, north, east, |row_idx| { + let row = row_idx as usize; + if !row_passes_filters( + row, + &parsed_filters, + &parsed_enum_filters, + feature_data, + num_features, + ) { + return; + } + let cell_id = cell_for_row(row); + let aggregation = groups + .entry(cell_id) + .or_insert_with(|| CellAgg::new(num_features)); + aggregation.add_row(feature_data, row, num_features); + }); + } let t_agg = t0.elapsed(); @@ -259,6 +276,7 @@ pub async fn get_hexagons( max_keys, num_features, field_indices.as_deref(), + (south, west, north, east), ); let t_total = t0.elapsed(); diff --git a/server-rs/src/routes/postcodes.rs b/server-rs/src/routes/postcodes.rs index 50b9980..7c68f3a 100644 --- a/server-rs/src/routes/postcodes.rs +++ b/server-rs/src/routes/postcodes.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use axum::extract::Query; +use axum::extract::{Path, Query}; use axum::http::StatusCode; use axum::response::Json; use rustc_hash::FxHashMap; @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use tracing::info; -use crate::parsing::{parse_bounds, parse_filters, row_passes_filters}; +use crate::parsing::{bounds_intersect, parse_bounds, parse_filters, row_passes_filters}; use crate::state::AppState; #[derive(Serialize)] @@ -96,7 +96,7 @@ pub async fn get_postcodes( let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), - &state.data.feature_names, + &state.feature_name_to_index, &state.data.enum_values, ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); @@ -113,11 +113,7 @@ pub async fn get_postcodes( if name.is_empty() { return None; } - state - .data - .feature_names - .iter() - .position(|feat| feat == name) + state.feature_name_to_index.get(name).copied() }) .collect() }); @@ -134,12 +130,6 @@ pub async fn get_postcodes( let has_selective = field_indices.is_some(); let sel_indices = field_indices.as_deref().unwrap_or(&[]); - // Step 1: Find postcodes within bounds using spatial grid on centroids - let postcode_indices: Vec = postcode_data.grid.query(south, west, north, east); - - // Step 2: For each postcode, aggregate properties - let mut postcode_aggs: FxHashMap = FxHashMap::default(); - // Build postcode -> rows mapping by iterating properties in bounds // and grouping by their postcode let mut postcode_rows: FxHashMap> = FxHashMap::default(); @@ -165,24 +155,23 @@ pub async fn get_postcodes( } }); - // Now aggregate for each postcode that's in bounds and has properties - for &pc_idx in &postcode_indices { - let idx = pc_idx as usize; - if let Some(rows) = postcode_rows.get(&idx) { - let agg = postcode_aggs - .entry(idx) - .or_insert_with(|| PostcodeAgg::new(num_features)); - for &row in rows { - if has_selective { - agg.add_row_selective(feature_data, row, num_features, sel_indices); - } else { - agg.add_row(feature_data, row, num_features); - } + // Aggregate for each postcode that has properties in bounds + // (polygon intersection check happens later when building response) + let mut postcode_aggs: FxHashMap = FxHashMap::default(); + for (&pc_idx, rows) in &postcode_rows { + let agg = postcode_aggs + .entry(pc_idx) + .or_insert_with(|| PostcodeAgg::new(num_features)); + for &row in rows { + if has_selective { + agg.add_row_selective(feature_data, row, num_features, sel_indices); + } else { + agg.add_row(feature_data, row, num_features); } } } - // Build response + // Build response, filtering postcodes to only those whose polygon intersects query bounds let mut features = Vec::with_capacity(postcode_aggs.len()); for (pc_idx, aggregation) in postcode_aggs { @@ -190,6 +179,23 @@ pub async fn get_postcodes( continue; } + // Compute postcode polygon bounding box and check intersection with query bounds + let vertices = &postcode_data.vertices[pc_idx]; + let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY); + let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY); + for &[lon, lat] in vertices { + let lon_f = lon as f64; + let lat_f = lat as f64; + if lat_f < pc_south { pc_south = lat_f; } + if lat_f > pc_north { pc_north = lat_f; } + if lon_f < pc_west { pc_west = lon_f; } + if lon_f > pc_east { pc_east = lon_f; } + } + + if !bounds_intersect(pc_south, pc_west, pc_north, pc_east, south, west, north, east) { + continue; + } + let mut map = Map::new(); map.insert( "postcode".into(), @@ -198,7 +204,7 @@ pub async fn get_postcodes( map.insert("count".into(), Value::Number(aggregation.count.into())); // Add vertices as array of [lon, lat] pairs - let vertices_array: Vec = postcode_data.vertices[pc_idx] + let vertices_array: Vec = vertices .iter() .map(|[lon, lat]| Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)])) .collect(); @@ -244,3 +250,44 @@ pub async fn get_postcodes( Ok(Json(response)) } + +#[derive(Serialize)] +pub struct PostcodeLookupResponse { + pub postcode: String, + pub latitude: f64, + pub longitude: f64, + /// Polygon vertices as [[lon, lat], ...] for rendering highlight + pub vertices: Vec<[f64; 2]>, +} + +/// Look up a single postcode and return its centroid coordinates and polygon. +pub async fn get_postcode_lookup( + state: Arc, + Path(postcode): Path, +) -> Result, StatusCode> { + // Normalize the postcode: uppercase, remove extra spaces, ensure single space + let normalized = postcode + .to_uppercase() + .split_whitespace() + .collect::>() + .join(" "); + + let postcode_data = &state.postcode_data; + + if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) { + let (lat, lon) = postcode_data.centroids[idx]; + let vertices: Vec<[f64; 2]> = postcode_data.vertices[idx] + .iter() + .map(|[lo, la]| [*lo as f64, *la as f64]) + .collect(); + info!(postcode = %normalized, "GET /api/postcode/{postcode}"); + Ok(Json(PostcodeLookupResponse { + postcode: normalized, + latitude: lat as f64, + longitude: lon as f64, + vertices, + })) + } else { + Err(StatusCode::NOT_FOUND) + } +} diff --git a/server-rs/src/routes/properties.rs b/server-rs/src/routes/properties.rs index d851f1c..a250fdb 100644 --- a/server-rs/src/routes/properties.rs +++ b/server-rs/src/routes/properties.rs @@ -66,7 +66,7 @@ fn non_empty_string(text: &str) -> Option { /// Look up an enum feature value by trying multiple possible column names. /// Uses the unified feature model: enum values stored as f32 indices in feature_data. fn lookup_enum_value( - feature_names: &[String], + feature_name_to_index: &FxHashMap, feature_data: &[f32], num_features: usize, enum_values: &FxHashMap>, @@ -74,7 +74,7 @@ fn lookup_enum_value( names: &[&str], ) -> Option { for name in names { - if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) { + if let Some(&feat_idx) = feature_name_to_index.get(*name) { if let Some(values) = enum_values.get(&feat_idx) { let value = feature_data[row * num_features + feat_idx]; if value.is_finite() { @@ -120,7 +120,7 @@ pub async fn get_hexagon_properties( let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), - &state.data.feature_names, + &state.feature_name_to_index, &state.data.enum_values, ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); @@ -134,6 +134,7 @@ pub async fn get_hexagon_properties( let num_features = state.data.num_features; let feature_data = &state.data.feature_data; let feature_names = &state.data.feature_names; + let feature_name_to_index = &state.feature_name_to_index; let enum_values = &state.data.enum_values; let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001); @@ -199,7 +200,7 @@ pub async fn get_hexagon_properties( postcode: non_empty_string(state.data.postcode(row)), is_construction_date_approximate: Some(state.data.is_approx_build_date(row)), property_type: lookup_enum_value( - feature_names, + feature_name_to_index, feature_data, num_features, enum_values, @@ -207,7 +208,7 @@ pub async fn get_hexagon_properties( &["Property type", "epc_property_type", "pp_property_type"], ), built_form: lookup_enum_value( - feature_names, + feature_name_to_index, feature_data, num_features, enum_values, @@ -215,7 +216,7 @@ pub async fn get_hexagon_properties( &["Property type/built form", "built_form"], ), duration: lookup_enum_value( - feature_names, + feature_name_to_index, feature_data, num_features, enum_values, @@ -223,7 +224,7 @@ pub async fn get_hexagon_properties( &["Leashold/Freehold", "duration"], ), current_energy_rating: lookup_enum_value( - feature_names, + feature_name_to_index, feature_data, num_features, enum_values, @@ -231,7 +232,7 @@ pub async fn get_hexagon_properties( &["Current energy rating", "current_energy_rating"], ), potential_energy_rating: lookup_enum_value( - feature_names, + feature_name_to_index, feature_data, num_features, enum_values, diff --git a/server-rs/src/routes/tiles.rs b/server-rs/src/routes/tiles.rs index ef868e3..252d9cd 100644 --- a/server-rs/src/routes/tiles.rs +++ b/server-rs/src/routes/tiles.rs @@ -106,18 +106,7 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s "paint": { "background-color": bg_color } })]; - // Water layer - if layer_ids.contains(&"water") { - style_layers.push(serde_json::json!({ - "id": "water", - "type": "fill", - "source": "protomaps", - "source-layer": "water", - "paint": { "fill-color": water_color } - })); - } - - // Land/earth layer + // Land/earth layer (must come before water so rivers render on top) if layer_ids.contains(&"earth") { style_layers.push(serde_json::json!({ "id": "earth", @@ -128,9 +117,9 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s })); } - // Landuse + // Landuse (parks, forests) - render before water if layer_ids.contains(&"landuse") { - let landuse_color = if is_dark { "#1f2d1f" } else { "#d8e8c8" }; + let park_color = if is_dark { "#2d4a2d" } else { "#c8e6c8" }; style_layers.push(serde_json::json!({ "id": "landuse-park", "type": "fill", @@ -141,7 +130,18 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s ["==", ["get", "pmap:kind"], "nature_reserve"], ["==", ["get", "pmap:kind"], "forest"] ], - "paint": { "fill-color": landuse_color, "fill-opacity": 0.5 } + "paint": { "fill-color": park_color, "fill-opacity": 0.7 } + })); + } + + // Water layer (after earth so rivers show on top of land) + if layer_ids.contains(&"water") { + style_layers.push(serde_json::json!({ + "id": "water", + "type": "fill", + "source": "protomaps", + "source-layer": "water", + "paint": { "fill-color": water_color } })); } diff --git a/server-rs/src/semantic_tests.rs b/server-rs/src/semantic_tests.rs new file mode 100644 index 0000000..b2803f4 --- /dev/null +++ b/server-rs/src/semantic_tests.rs @@ -0,0 +1,974 @@ +//! Comprehensive semantic tests for the server. +//! +//! These tests validate the correctness of data processing, aggregation logic, +//! spatial queries, and filter semantics without requiring real data files. + +#[cfg(test)] +mod tests { + use rustc_hash::FxHashMap; + + use crate::data::{compute_feature_stats, Histogram}; + use crate::features::Bounds; + use crate::parsing::{ + bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters, + ParsedEnumFilter, ParsedFilter, + }; + use crate::utils::GridIndex; + + // ========================================================================= + // GridIndex Tests + // ========================================================================= + + mod grid_index { + use super::*; + + #[test] + fn empty_grid_returns_empty() { + let grid = GridIndex::build(&[], &[], 0.01); + assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty()); + } + + #[test] + fn single_point_inside_query() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query that contains the point + let result = grid.query(51.4, -0.2, 51.6, 0.0); + assert_eq!(result.len(), 1); + assert_eq!(result[0], 0); + } + + #[test] + fn single_point_outside_query() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query that doesn't contain the point + let result = grid.query(52.0, 0.0, 53.0, 1.0); + assert!(result.is_empty()); + } + + #[test] + fn multiple_points_partial_query() { + let lat = vec![51.5_f32, 51.6, 51.7, 52.0]; + let lon = vec![-0.1_f32, -0.1, -0.1, -0.1]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query that contains only points 0, 1, 2 + let result = grid.query(51.4, -0.2, 51.8, 0.0); + assert_eq!(result.len(), 3); + assert!(result.contains(&0)); + assert!(result.contains(&1)); + assert!(result.contains(&2)); + assert!(!result.contains(&3)); + } + + #[test] + fn query_at_grid_boundary() { + // Points at exactly cell boundaries + let lat = vec![51.0_f32, 51.01, 51.02]; + let lon = vec![0.0_f32, 0.01, 0.02]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query just past the first cell + let result = grid.query(50.99, -0.01, 51.005, 0.005); + assert!(result.contains(&0)); + } + + #[test] + fn for_each_matches_query() { + let lat = vec![51.5_f32, 51.6, 51.7]; + let lon = vec![-0.1_f32, -0.2, -0.3]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let query_result = grid.query(51.4, -0.25, 51.65, 0.0); + + let mut foreach_result = Vec::new(); + grid.for_each_in_bounds(51.4, -0.25, 51.65, 0.0, |idx| { + foreach_result.push(idx); + }); + + // Both methods should return the same indices + assert_eq!(query_result.len(), foreach_result.len()); + for idx in &query_result { + assert!(foreach_result.contains(idx)); + } + } + + #[test] + fn negative_coordinates() { + let lat = vec![-33.9_f32, -33.8, -33.7]; + let lon = vec![151.2_f32, 151.3, 151.4]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query: south=-34.0, north=-33.65 + // -33.9 is in range (between -34 and -33.65), lon 151.2 in range (151.1 to 151.5) ✓ + // -33.8 is in range, lon 151.3 in range ✓ + // -33.7 is in range, lon 151.4 in range ✓ + let result = grid.query(-34.0, 151.1, -33.65, 151.5); + assert_eq!(result.len(), 3); + } + + #[test] + fn query_bounds_completely_outside_grid() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query in a completely different area + let result = grid.query(0.0, 100.0, 10.0, 110.0); + assert!(result.is_empty()); + } + + #[test] + fn very_small_cell_size() { + let lat = vec![51.5_f32, 51.5001, 51.5002]; + let lon = vec![-0.1_f32, -0.1001, -0.1002]; + let grid = GridIndex::build(&lat, &lon, 0.0001); + + let result = grid.query(51.4999, -0.1003, 51.5003, -0.0999); + assert_eq!(result.len(), 3); + } + } + + // ========================================================================= + // Filter Parsing Tests + // ========================================================================= + + mod filter_parsing { + use super::*; + + fn make_feature_name_to_index() -> FxHashMap { + [ + ("Price".into(), 0), + ("Area".into(), 1), + ("Rating".into(), 2), + ("Type".into(), 3), + ] + .into_iter() + .collect() + } + + fn make_enum_values() -> FxHashMap> { + let mut map = FxHashMap::default(); + // Feature index 3 (Type) is an enum + map.insert(3, vec!["Detached".into(), "Semi".into(), "Terraced".into(), "Flat".into()]); + map + } + + #[test] + fn parse_single_numeric_filter() { + let (numeric, enums) = parse_filters( + Some("Price:100000:500000"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert_eq!(numeric.len(), 1); + assert!(enums.is_empty()); + assert_eq!(numeric[0].feat_idx, 0); + assert_eq!(numeric[0].min, 100000.0); + assert_eq!(numeric[0].max, 500000.0); + } + + #[test] + fn parse_multiple_numeric_filters() { + let (numeric, _enums) = parse_filters( + Some("Price:100000:500000,Area:50:200"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert_eq!(numeric.len(), 2); + assert_eq!(numeric[0].feat_idx, 0); + assert_eq!(numeric[1].feat_idx, 1); + } + + #[test] + fn parse_single_enum_filter() { + let (numeric, enums) = parse_filters( + Some("Type:Detached|Flat"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert!(numeric.is_empty()); + assert_eq!(enums.len(), 1); + assert_eq!(enums[0].feat_idx, 3); + assert_eq!(enums[0].allowed, vec![0.0, 3.0]); // Detached=0, Flat=3 + } + + #[test] + fn parse_mixed_filters() { + let (numeric, enums) = parse_filters( + Some("Price:100000:500000,Type:Semi|Terraced"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert_eq!(numeric.len(), 1); + assert_eq!(enums.len(), 1); + } + + #[test] + fn parse_unknown_feature_ignored() { + let (numeric, enums) = parse_filters( + Some("Unknown:100:200"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert!(numeric.is_empty()); + assert!(enums.is_empty()); + } + + #[test] + fn parse_invalid_numeric_format_ignored() { + let (numeric, enums) = parse_filters( + Some("Price:not_a_number:500000"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert!(numeric.is_empty()); + assert!(enums.is_empty()); + } + + #[test] + fn parse_enum_with_unknown_value() { + let (_numeric, enums) = parse_filters( + Some("Type:Detached|Unknown|Flat"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert_eq!(enums.len(), 1); + // Unknown is filtered out, only Detached and Flat remain + assert_eq!(enums[0].allowed, vec![0.0, 3.0]); + } + + #[test] + fn parse_empty_filter_string() { + let (numeric, enums) = parse_filters( + Some(""), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert!(numeric.is_empty()); + assert!(enums.is_empty()); + } + + #[test] + fn parse_none_filter() { + let (numeric, enums) = parse_filters( + None, + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert!(numeric.is_empty()); + assert!(enums.is_empty()); + } + + #[test] + fn parse_filter_with_whitespace() { + let (numeric, enums) = parse_filters( + Some("Price : 100000 : 500000 , Type : Detached | Flat"), + &make_feature_name_to_index(), + &make_enum_values(), + ); + + assert_eq!(numeric.len(), 1); + assert_eq!(enums.len(), 1); + } + } + + // ========================================================================= + // Filter Application Tests + // ========================================================================= + + mod filter_application { + use super::*; + + #[test] + fn row_passes_no_filters() { + let feature_data = vec![100.0_f32, 50.0]; + assert!(row_passes_filters(0, &[], &[], &feature_data, 2)); + } + + #[test] + fn row_passes_numeric_filter_in_range() { + let feature_data = vec![150.0_f32]; + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + assert!(row_passes_filters(0, &filters, &[], &feature_data, 1)); + } + + #[test] + fn row_fails_numeric_filter_below_min() { + let feature_data = vec![50.0_f32]; + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); + } + + #[test] + fn row_fails_numeric_filter_above_max() { + let feature_data = vec![250.0_f32]; + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); + } + + #[test] + fn row_passes_numeric_filter_at_boundary() { + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + + // At min boundary + assert!(row_passes_filters(0, &filters, &[], &[100.0], 1)); + // At max boundary + assert!(row_passes_filters(0, &filters, &[], &[200.0], 1)); + } + + #[test] + fn row_fails_numeric_filter_with_nan() { + let feature_data = vec![f32::NAN]; + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); + } + + #[test] + fn row_passes_enum_filter_allowed_value() { + let feature_data = vec![1.0_f32]; // Index 1 + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: vec![0.0, 1.0, 2.0], + }]; + assert!(row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + fn row_fails_enum_filter_disallowed_value() { + let feature_data = vec![3.0_f32]; // Index 3 not in allowed + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: vec![0.0, 1.0, 2.0], + }]; + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + fn row_fails_enum_filter_with_nan() { + let feature_data = vec![f32::NAN]; + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: vec![0.0, 1.0, 2.0], + }]; + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + fn row_fails_empty_enum_filter() { + let feature_data = vec![1.0_f32]; + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: vec![], // Empty allowed list + }]; + // Empty allowed means nothing passes + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + fn multiple_filters_all_must_pass() { + // Row with two features: price=150, type=1 + let feature_data = vec![150.0_f32, 1.0]; + + let numeric_filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 1, + allowed: vec![1.0, 2.0], + }]; + + assert!(row_passes_filters(0, &numeric_filters, &enum_filters, &feature_data, 2)); + + // Change enum filter to not include 1.0 + let enum_filters_fail = vec![ParsedEnumFilter { + feat_idx: 1, + allowed: vec![0.0, 2.0], + }]; + assert!(!row_passes_filters(0, &numeric_filters, &enum_filters_fail, &feature_data, 2)); + } + + #[test] + fn row_major_layout_correct_indexing() { + // 3 rows, 2 features each + // Row 0: [100, 0], Row 1: [200, 1], Row 2: [300, 2] + let feature_data = vec![ + 100.0_f32, 0.0, // Row 0 + 200.0, 1.0, // Row 1 + 300.0, 2.0, // Row 2 + ]; + let num_features = 2; + + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 150.0, + max: 250.0, + }]; + + assert!(!row_passes_filters(0, &filters, &[], &feature_data, num_features)); // 100 not in range + assert!(row_passes_filters(1, &filters, &[], &feature_data, num_features)); // 200 in range + assert!(!row_passes_filters(2, &filters, &[], &feature_data, num_features)); // 300 not in range + } + } + + // ========================================================================= + // Bounds Parsing Tests + // ========================================================================= + + mod bounds_parsing { + use super::*; + + #[test] + fn parse_valid_bounds() { + let (south, west, north, east) = parse_bounds("51.0,-0.5,52.0,0.5").unwrap(); + assert_eq!(south, 51.0); + assert_eq!(west, -0.5); + assert_eq!(north, 52.0); + assert_eq!(east, 0.5); + } + + #[test] + fn parse_bounds_with_spaces() { + let (south, west, _north, _east) = parse_bounds("51.0, -0.5, 52.0, 0.5").unwrap(); + assert_eq!(south, 51.0); + assert_eq!(west, -0.5); + } + + #[test] + fn parse_bounds_negative_values() { + let (south, _west, north, _east) = parse_bounds("-51.5,-0.5,-50.0,0.5").unwrap(); + assert_eq!(south, -51.5); + assert_eq!(north, -50.0); + } + + #[test] + fn parse_bounds_invalid_too_few_parts() { + assert!(parse_bounds("51.0,-0.5,52.0").is_err()); + } + + #[test] + fn parse_bounds_invalid_too_many_parts() { + assert!(parse_bounds("51.0,-0.5,52.0,0.5,1.0").is_err()); + } + + #[test] + fn parse_bounds_invalid_non_numeric() { + assert!(parse_bounds("51.0,abc,52.0,0.5").is_err()); + } + + #[test] + fn parse_bounds_empty_string() { + assert!(parse_bounds("").is_err()); + } + } + + // ========================================================================= + // Bounds Intersection Tests + // ========================================================================= + + mod bounds_intersection { + use super::*; + + #[test] + fn overlapping_boxes_intersect() { + assert!(bounds_intersect( + 0.0, 0.0, 2.0, 2.0, // Box A + 1.0, 1.0, 3.0, 3.0 // Box B overlaps + )); + } + + #[test] + fn one_box_inside_other_intersects() { + assert!(bounds_intersect( + 0.0, 0.0, 10.0, 10.0, // Box A (large) + 2.0, 2.0, 5.0, 5.0 // Box B (inside A) + )); + } + + #[test] + fn touching_at_corner_intersects() { + assert!(bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 1.0, 1.0, 2.0, 2.0 // Box B touches at (1,1) + )); + } + + #[test] + fn touching_at_edge_intersects() { + assert!(bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 1.0, 0.0, 2.0, 1.0 // Box B touches along right edge + )); + } + + #[test] + fn disjoint_horizontally_no_intersect() { + assert!(!bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 0.0, 2.0, 1.0, 3.0 // Box B to the right + )); + } + + #[test] + fn disjoint_vertically_no_intersect() { + assert!(!bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 2.0, 0.0, 3.0, 1.0 // Box B above + )); + } + + #[test] + fn disjoint_diagonally_no_intersect() { + assert!(!bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 2.0, 2.0, 3.0, 3.0 // Box B diagonally away + )); + } + + #[test] + fn negative_coordinates_intersect() { + assert!(bounds_intersect( + -2.0, -2.0, -1.0, -1.0, // Box A (negative coords) + -1.5, -1.5, -0.5, -0.5 // Box B overlaps + )); + } + } + + // ========================================================================= + // H3 Cell Bounds Tests + // ========================================================================= + + mod h3_bounds { + use super::*; + use std::str::FromStr; + + #[test] + fn h3_cell_bounds_zero_buffer() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let (south, west, north, east) = h3_cell_bounds(cell, 0.0); + + // San Francisco area, should be roughly 37.77°N, -122.4°W + assert!(south < north, "south {} should be < north {}", south, north); + assert!(west < east, "west {} should be < east {}", west, east); + assert!(south > 30.0 && south < 45.0); + assert!(west < -100.0); + } + + #[test] + fn h3_cell_bounds_with_buffer() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let (s0, w0, n0, e0) = h3_cell_bounds(cell, 0.0); + let (s1, w1, n1, e1) = h3_cell_bounds(cell, 0.1); + + // With buffer, bounds should be larger + assert!(s1 < s0, "south with buffer should be smaller"); + assert!(w1 < w0, "west with buffer should be smaller"); + assert!(n1 > n0, "north with buffer should be larger"); + assert!(e1 > e0, "east with buffer should be larger"); + + // Buffer should be exactly 0.1 degrees + assert!((s0 - s1 - 0.1).abs() < 1e-10); + assert!((w0 - w1 - 0.1).abs() < 1e-10); + } + + #[test] + fn h3_cell_bounds_different_resolutions() { + // Resolution 9 cell + let cell_high = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + // Get its resolution 5 parent + let res5 = h3o::Resolution::try_from(5).unwrap(); + let cell_low = cell_high.parent(res5).unwrap(); + + let (s_low, w_low, n_low, e_low) = h3_cell_bounds(cell_low, 0.0); + let (s_high, w_high, n_high, e_high) = h3_cell_bounds(cell_high, 0.0); + + // Lower resolution cell should have larger bounds + let area_low = (n_low - s_low) * (e_low - w_low); + let area_high = (n_high - s_high) * (e_high - w_high); + assert!(area_low > area_high, "Lower res should have larger area"); + } + } + + // ========================================================================= + // Histogram Computation Tests + // ========================================================================= + + mod histogram { + use super::*; + + fn make_fixed_bounds(min: f32, max: f32) -> Bounds { + Bounds::Fixed { min, max } + } + + fn make_percentile_bounds(low: f64, high: f64) -> Bounds { + Bounds::Percentile { low, high } + } + + #[test] + fn histogram_empty_data() { + let data: Vec = vec![]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.slider_min, 0.0); + assert_eq!(stats.slider_max, 100.0); + assert_eq!(stats.histogram.counts.iter().sum::(), 0); + } + + #[test] + fn histogram_single_value() { + let data = vec![50.0_f32]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.histogram.min, 50.0); + assert_eq!(stats.histogram.max, 50.0); + assert_eq!(stats.histogram.counts.iter().sum::(), 1); + } + + #[test] + fn histogram_uniform_distribution() { + // 100 values from 0 to 99 + let data: Vec = (0..100).map(|i| i as f32).collect(); + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.histogram.min, 0.0); + assert_eq!(stats.histogram.max, 99.0); + assert_eq!(stats.histogram.counts.iter().sum::(), 100); + } + + #[test] + fn histogram_with_nan_values() { + let data = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 30.0]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + // Only 3 non-NaN values + assert_eq!(stats.histogram.counts.iter().sum::(), 3); + assert_eq!(stats.histogram.min, 10.0); + assert_eq!(stats.histogram.max, 30.0); + } + + #[test] + fn histogram_all_nan() { + let data = vec![f32::NAN, f32::NAN, f32::NAN]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.histogram.counts.iter().sum::(), 0); + } + + #[test] + fn histogram_all_same_value() { + let data = vec![42.0_f32; 1000]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.histogram.min, 42.0); + assert_eq!(stats.histogram.max, 42.0); + assert_eq!(stats.histogram.p1, 42.0); + assert_eq!(stats.histogram.p99, 42.0); + assert_eq!(stats.histogram.counts.iter().sum::(), 1000); + } + + #[test] + fn histogram_percentile_bounds() { + // Data with outliers: 1 very low, 1 very high, 98 in middle + let mut data: Vec = vec![0.0]; // Low outlier + data.extend((1..99).map(|i| 50.0 + i as f32 * 0.01)); // Main data around 50 + data.push(1000.0); // High outlier + + let bounds = make_percentile_bounds(2.0, 98.0); + let stats = compute_feature_stats(&data, &bounds); + + // Slider should exclude outliers + assert!(stats.slider_min > 0.0); + assert!(stats.slider_max < 1000.0); + } + + #[test] + fn histogram_bin_for_value() { + let hist = Histogram { + min: 0.0, + max: 100.0, + p1: 10.0, + p99: 90.0, + counts: vec![0; 10], // 10 bins + }; + + // Low outlier bin (bin 0) + assert_eq!(hist.bin_for_value(5.0), 0); + + // High outlier bin (bin 9) + assert_eq!(hist.bin_for_value(95.0), 9); + + // Middle bins (bins 1-8) + let mid_value = 50.0; + let bin = hist.bin_for_value(mid_value); + assert!(bin >= 1 && bin <= 8); + } + + #[test] + fn histogram_middle_bin_width() { + let hist = Histogram { + min: 0.0, + max: 100.0, + p1: 10.0, + p99: 90.0, + counts: vec![0; 10], // 10 bins + }; + + // Middle bins span p1 to p99 (80 units) across 8 bins (10 - 2 outlier bins) + let expected_width = (90.0 - 10.0) / 8.0; + assert!((hist.middle_bin_width() - expected_width).abs() < 0.001); + } + + #[test] + fn histogram_cardinality_caps_bins() { + // Only 3 unique values - should cap bins at 3 + let data = vec![1.0_f32, 1.0, 2.0, 2.0, 3.0, 3.0]; + let bounds = make_fixed_bounds(0.0, 100.0); + let stats = compute_feature_stats(&data, &bounds); + + // Bins should be capped at cardinality (3) + assert_eq!(stats.histogram.counts.len(), 3); + } + } + + // ========================================================================= + // Aggregation Semantics Tests + // ========================================================================= + + mod aggregation { + /// Test that min/max aggregation correctly handles NaN values + #[test] + fn min_max_skips_nan() { + let values = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 5.0]; + + let mut min = f32::INFINITY; + let mut max = f32::NEG_INFINITY; + for &v in &values { + if v.is_finite() { + if v < min { + min = v; + } + if v > max { + max = v; + } + } + } + + assert_eq!(min, 5.0); + assert_eq!(max, 20.0); + } + + /// Test that counting only counts non-NaN values + #[test] + fn count_skips_nan() { + let values = vec![1.0_f32, f32::NAN, 2.0, f32::NAN, 3.0]; + let count = values.iter().filter(|v| v.is_finite()).count(); + assert_eq!(count, 3); + } + + /// Test enum value counting with indices + #[test] + fn enum_value_counting() { + // Enum values: 0.0=Detached, 1.0=Semi, 2.0=Terraced, 3.0=Flat + let values = vec![0.0_f32, 1.0, 1.0, 2.0, f32::NAN, 3.0, 1.0]; + let enum_count = 4; + + let mut counts = vec![0u64; enum_count]; + for &v in &values { + if v.is_finite() { + let idx = v as usize; + if idx < enum_count { + counts[idx] += 1; + } + } + } + + assert_eq!(counts[0], 1); // Detached + assert_eq!(counts[1], 3); // Semi + assert_eq!(counts[2], 1); // Terraced + assert_eq!(counts[3], 1); // Flat + } + } + + // ========================================================================= + // H3 Resolution Tests + // ========================================================================= + + mod h3_resolution { + use std::str::FromStr; + + #[test] + fn parent_cell_at_lower_resolution() { + // Resolution 9 cell + let child = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + + // Get parent at resolution 7 + let parent_res = h3o::Resolution::try_from(7).unwrap(); + let parent = child.parent(parent_res).unwrap(); + + assert_eq!(parent.resolution(), parent_res); + + // Child should be contained in parent + assert!(parent.children(child.resolution()).any(|c| c == child)); + } + + #[test] + fn same_resolution_returns_self() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let res = cell.resolution(); + + // Getting parent at same resolution should return the cell itself + let parent = cell.parent(res); + assert_eq!(parent, Some(cell)); + } + + #[test] + fn higher_resolution_parent_fails() { + // Resolution 9 cell + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + + // Try to get "parent" at higher resolution (impossible) + let higher_res = h3o::Resolution::try_from(10).unwrap(); + let parent = cell.parent(higher_res); + assert!(parent.is_none()); + } + } + + // ========================================================================= + // Edge Cases and Error Handling + // ========================================================================= + + mod edge_cases { + use super::*; + + #[test] + fn very_large_coordinates() { + let lat = vec![89.9_f32, -89.9]; + let lon = vec![179.9_f32, -179.9]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(-90.0, -180.0, 90.0, 180.0); + assert_eq!(result.len(), 2); + } + + #[test] + fn filter_at_float_precision_boundary() { + let value = 100.0_f32; + let filter = ParsedFilter { + feat_idx: 0, + min: 100.0 - f32::EPSILON, + max: 100.0 + f32::EPSILON, + }; + + assert!(row_passes_filters(0, &[filter], &[], &[value], 1)); + } + + #[test] + fn enum_filter_with_fractional_index() { + // What happens if the stored value isn't exactly an integer? + let feature_data = vec![1.5_f32]; // Not exactly 1.0 or 2.0 + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: vec![1.0, 2.0], + }]; + + // 1.5 is not in the allowed list [1.0, 2.0] + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + #[test] + fn bounds_with_inverted_min_max() { + // What if south > north? (Invalid input) + // The parse_bounds function doesn't validate this + let (south, _west, north, _east) = parse_bounds("52.0,-0.5,51.0,0.5").unwrap(); + assert_eq!(south, 52.0); + assert_eq!(north, 51.0); + // south > north is allowed by parsing but logically invalid + + // GridIndex should handle this gracefully + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + // Query with inverted bounds returns empty (row_min > row_max is rejected) + let result = grid.query(52.0, -0.5, 51.0, 0.5); + assert!(result.is_empty(), "Inverted bounds should return empty"); + } + + #[test] + fn infinity_values_in_data() { + // NOTE: The current implementation uses !is_nan() not is_finite() + // So INFINITY values ARE included in min/max calculations. + // This documents current behavior - consider if this should be fixed. + let data = vec![f32::INFINITY, f32::NEG_INFINITY, 50.0]; + let bounds = Bounds::Fixed { + min: 0.0, + max: 100.0, + }; + let stats = compute_feature_stats(&data, &bounds); + + // Current behavior: infinity is included (uses !is_nan()) + assert_eq!(stats.histogram.min, f32::NEG_INFINITY); + assert_eq!(stats.histogram.max, f32::INFINITY); + // All 3 values are counted (none are NaN) + assert_eq!(stats.histogram.counts.iter().sum::(), 3); + } + + #[test] + fn only_finite_values() { + // Test that normal finite values work correctly + let data = vec![10.0_f32, 20.0, 30.0]; + let bounds = Bounds::Fixed { + min: 0.0, + max: 100.0, + }; + let stats = compute_feature_stats(&data, &bounds); + + assert_eq!(stats.histogram.min, 10.0); + assert_eq!(stats.histogram.max, 30.0); + assert_eq!(stats.histogram.counts.iter().sum::(), 3); + } + } +} diff --git a/server-rs/src/state.rs b/server-rs/src/state.rs index 043b109..c8b3479 100644 --- a/server-rs/src/state.rs +++ b/server-rs/src/state.rs @@ -1,3 +1,5 @@ +use rustc_hash::FxHashMap; + use crate::data::{POICategoryGroup, POIData, PostcodeData, PropertyData}; use crate::routes::FeaturesResponse; use crate::utils::GridIndex; @@ -12,6 +14,8 @@ pub struct AppState { pub poi_grid: GridIndex, /// Postcode boundary data for high-zoom rendering pub postcode_data: PostcodeData, + /// O(1) lookup: feature name → index in feature_names/feature_data + pub feature_name_to_index: FxHashMap, /// Precomputed JSON key names: "min_{feature_name}" for each feature pub min_keys: Vec, /// Precomputed JSON key names: "max_{feature_name}" for each feature diff --git a/server-rs/src/utils/grid_index.rs b/server-rs/src/utils/grid_index.rs index 079e03a..242c481 100644 --- a/server-rs/src/utils/grid_index.rs +++ b/server-rs/src/utils/grid_index.rs @@ -33,24 +33,19 @@ impl GridIndex { }; } + // Single pass: compute min/max bounds and count items per cell let mut min_lat = f32::INFINITY; let mut max_lat = f32::NEG_INFINITY; let mut min_lon = f32::INFINITY; let mut max_lon = f32::NEG_INFINITY; for index in 0..lat.len() { - if lat[index] < min_lat { - min_lat = lat[index]; - } - if lat[index] > max_lat { - max_lat = lat[index]; - } - if lon[index] < min_lon { - min_lon = lon[index]; - } - if lon[index] > max_lon { - max_lon = lon[index]; - } + let la = lat[index]; + let lo = lon[index]; + if la < min_lat { min_lat = la; } + if la > max_lat { max_lat = la; } + if lo < min_lon { min_lon = lo; } + if lo > max_lon { max_lon = lo; } } min_lat -= cell_size; @@ -70,7 +65,7 @@ impl GridIndex { "Building grid index (CSR)" ); - // First pass: count items per cell + // Count items per cell (now we know the bounds) let mut counts = vec![0u32; num_cells]; for index in 0..lat.len() { let grid_row = ((lat[index] - min_lat) / cell_size) as usize;