Various fixes

This commit is contained in:
Andras Schmelczer 2026-02-04 22:29:42 +00:00
parent 34a4d0ba86
commit 55598aaaa0
14 changed files with 1250 additions and 130 deletions

View file

@ -4,4 +4,4 @@ mod property;
pub use poi::{POICategoryGroup, POIData};
pub use postcodes::PostcodeData;
pub use property::{precompute_h3, Histogram, PropertyData};
pub use property::{compute_feature_stats, precompute_h3, Histogram, PropertyData};

View file

@ -5,6 +5,8 @@ mod metrics;
mod og_middleware;
pub mod parsing;
mod routes;
#[cfg(test)]
mod semantic_tests;
mod state;
pub mod utils;
@ -147,6 +149,13 @@ async fn main() -> anyhow::Result<()> {
let tile_reader = Arc::new(routes::init_tile_reader(tiles_path).await?);
info!("PMTiles loaded successfully");
let feature_name_to_index: rustc_hash::FxHashMap<String, usize> = property_data
.feature_names
.iter()
.enumerate()
.map(|(idx, name)| (name.clone(), idx))
.collect();
let min_keys: Vec<String> = property_data
.feature_names
.iter()
@ -219,6 +228,7 @@ async fn main() -> anyhow::Result<()> {
poi_data,
poi_grid,
postcode_data,
feature_name_to_index,
min_keys,
max_keys,
poi_category_groups,
@ -237,6 +247,7 @@ async fn main() -> anyhow::Result<()> {
let state_features = state.clone();
let state_hexagons = state.clone();
let state_postcodes = state.clone();
let state_postcode_lookup = state.clone();
let state_pois = state.clone();
let state_poi_categories = state.clone();
let state_hexagon_properties = state.clone();
@ -257,6 +268,10 @@ async fn main() -> anyhow::Result<()> {
"/api/postcodes",
get(move |query| routes::get_postcodes(state_postcodes.clone(), query)),
)
.route(
"/api/postcode/{postcode}",
get(move |path| routes::get_postcode_lookup(state_postcode_lookup.clone(), path)),
)
.route(
"/api/pois",
get(move |query| routes::get_pois(state_pois.clone(), query)),

View file

@ -1,5 +1,5 @@
mod bounds;
mod filters;
pub use bounds::{h3_cell_bounds, parse_bounds};
pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds};
pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter};

View file

@ -1,11 +1,29 @@
use axum::http::StatusCode;
/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
/// Check if two bounding boxes intersect.
/// Both boxes are (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon).
#[inline]
pub fn bounds_intersect(
a_south: f64,
a_west: f64,
a_north: f64,
a_east: f64,
b_south: f64,
b_west: f64,
b_north: f64,
b_east: f64,
) -> bool {
a_west <= b_east && a_east >= b_west && a_south <= b_north && a_north >= b_south
}
/// Compute the lat/lon bounding box of an H3 cell in degrees, with a configurable buffer in degrees.
/// Returns (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon).
pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
let boundary = cell.boundary();
let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
for vertex in boundary.iter() {
// h3o LatLng::lat()/lng() return degrees
let lat = vertex.lat();
let lon = vertex.lng();
if lat < min_lat {
@ -81,4 +99,42 @@ mod tests {
assert!((buf_max_lat - max_lat - 0.1).abs() < 1e-10);
assert!((buf_max_lon - max_lon - 0.1).abs() < 1e-10);
}
#[test]
fn h3_cell_bounds_returns_degrees_not_radians() {
// Cell "8928308280fffff" is in San Francisco area (~37.77°N, ~-122.4°W)
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.0);
// If h3o returned radians, values would be < π ≈ 3.14
// Latitude ~37.77° proves we're getting degrees, not radians
assert!(min_lat > 30.0 && min_lat < 45.0, "min_lat {} should be ~37° (degrees)", min_lat);
assert!(max_lat > 30.0 && max_lat < 45.0, "max_lat {} should be ~37° (degrees)", max_lat);
// Longitude ~-122° also proves degrees (radians would be < π)
assert!(min_lon < -100.0, "min_lon {} should be ~-122° (degrees)", min_lon);
assert!(max_lon < -100.0, "max_lon {} should be ~-122° (degrees)", max_lon);
}
#[test]
fn bounds_intersect_overlapping() {
// Two overlapping boxes
assert!(bounds_intersect(0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0));
// Box B is inside box A
assert!(bounds_intersect(0.0, 0.0, 10.0, 10.0, 2.0, 2.0, 5.0, 5.0));
// Box A is inside box B
assert!(bounds_intersect(2.0, 2.0, 5.0, 5.0, 0.0, 0.0, 10.0, 10.0));
// Touching at edge
assert!(bounds_intersect(0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0));
}
#[test]
fn bounds_intersect_non_overlapping() {
// Box B is to the right of box A
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0));
// Box B is above box A
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0));
// Completely separate
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0));
}
}

View file

@ -1,4 +1,4 @@
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};
/// Filter for numeric features: value must be in [min, max] range.
pub struct ParsedFilter {
@ -8,9 +8,11 @@ pub struct ParsedFilter {
}
/// Filter for enum features: value must be one of the allowed indices.
/// Uses FxHashSet<u32> (f32 bits) for O(1) lookups instead of O(n) Vec::contains.
pub struct ParsedEnumFilter {
pub feat_idx: usize,
pub allowed: Vec<f32>,
/// Allowed enum indices stored as f32 bits for exact comparison
pub allowed: FxHashSet<u32>,
}
/// Parse comma-separated filter string into numeric and enum filters.
@ -18,7 +20,7 @@ pub struct ParsedEnumFilter {
/// Enum format: `name:val1|val2|val3` (pipe-separated string values)
pub fn parse_filters(
filter_str: Option<&str>,
feature_names: &[String],
feature_name_to_index: &FxHashMap<String, usize>,
enum_values: &FxHashMap<usize, Vec<String>>,
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
let mut numeric = Vec::new();
@ -37,22 +39,22 @@ pub fn parse_filters(
let name = parts[0].trim();
let rest = parts[1].trim();
// Find feature index by name
let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) else {
// Find feature index by name (O(1) lookup)
let Some(&feat_idx) = feature_name_to_index.get(name) else {
continue;
};
// Check if this is an enum feature
if let Some(values) = enum_values.get(&feat_idx) {
// Enum filter: convert string values to f32 indices
let allowed: Vec<f32> = rest
// Enum filter: convert string values to f32 indices (stored as bits for O(1) lookup)
let allowed: FxHashSet<u32> = rest
.split('|')
.filter_map(|value| {
let value = value.trim();
values
.iter()
.position(|existing| existing == value)
.map(|position| position as f32)
.map(|position| (position as f32).to_bits())
})
.collect();
enums.push(ParsedEnumFilter { feat_idx, allowed });
@ -93,7 +95,8 @@ pub fn row_passes_filters(
value.is_finite() && value >= filter.min && value <= filter.max
}) && enum_filters.iter().all(|filter| {
let value = feature_data[base + filter.feat_idx];
value.is_finite() && filter.allowed.contains(&value)
// O(1) lookup using f32 bits as key
value.is_finite() && filter.allowed.contains(&value.to_bits())
})
}
@ -101,8 +104,10 @@ pub fn row_passes_filters(
mod tests {
use super::*;
fn feature_names() -> Vec<String> {
vec!["price".into(), "area".into(), "rating".into()]
fn feature_name_to_index() -> FxHashMap<String, usize> {
[("price".into(), 0), ("area".into(), 1), ("rating".into(), 2)]
.into_iter()
.collect()
}
fn enum_values() -> FxHashMap<usize, Vec<String>> {
@ -113,7 +118,7 @@ mod tests {
#[test]
fn parse_filters_numeric() {
let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_names(), &enum_values());
let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_name_to_index(), &enum_values());
assert_eq!(numeric.len(), 1);
assert_eq!(numeric[0].feat_idx, 0);
assert_eq!(numeric[0].min, 100.0);
@ -123,22 +128,25 @@ mod tests {
#[test]
fn parse_filters_enum() {
let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_names(), &enum_values());
let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values());
assert!(numeric.is_empty());
assert_eq!(enums.len(), 1);
assert_eq!(enums[0].feat_idx, 2);
assert_eq!(enums[0].allowed, vec![0.0, 2.0]);
// Allowed values are stored as f32 bits
assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // A = index 0
assert!(enums[0].allowed.contains(&(2.0_f32).to_bits())); // C = index 2
assert_eq!(enums[0].allowed.len(), 2);
}
#[test]
fn parse_filters_empty_and_invalid() {
let (n, e) = parse_filters(None, &feature_names(), &enum_values());
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values());
assert!(n.is_empty() && e.is_empty());
let (n, e) = parse_filters(Some(""), &feature_names(), &enum_values());
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values());
assert!(n.is_empty() && e.is_empty());
let (n, e) = parse_filters(Some("unknown:1:2"), &feature_names(), &enum_values());
let (n, e) = parse_filters(Some("unknown:1:2"), &feature_name_to_index(), &enum_values());
assert!(n.is_empty() && e.is_empty());
}

View file

@ -12,6 +12,6 @@ pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
pub use og_image::get_og_image;
pub use pois::{get_poi_categories, get_pois};
pub use postcodes::get_postcodes;
pub use postcodes::{get_postcode_lookup, get_postcodes};
pub use properties::get_hexagon_properties;
pub use tiles::{get_style, get_tile, init_tile_reader};

View file

@ -8,7 +8,7 @@ use axum::response::Json;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
@ -87,7 +87,7 @@ pub async fn get_hexagon_stats(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -188,15 +188,17 @@ pub async fn get_hexagon_stats(
let global_hist = &state.data.feature_stats[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
// Use same bin count as global histogram for consistency
let num_bins = global_hist.counts.len();
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; HISTOGRAM_BINS];
let mut bins = vec![0u64; num_bins];
// Compute middle bin width (between p1 and p99)
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
@ -219,13 +221,13 @@ pub async fn get_hexagon_stats(
let bin = if value < p1 {
0 // Low outlier bin
} else if value >= p99 {
HISTOGRAM_BINS - 1 // High outlier bin
num_bins - 1 // High outlier bin
} else if middle_width > 0.0 {
// Middle bins (1 to n-2)
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
(1 + middle_bin).min(num_bins - 2)
} else {
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
num_bins / 2 // Fallback if p1 == p99
};
bins[bin] += 1;
}

View file

@ -9,7 +9,7 @@ use serde_json::{Map, Value};
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::parsing::{bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
#[derive(Serialize)]
@ -92,21 +92,29 @@ impl CellAgg {
}
}
/// Build feature maps from aggregated cell data.
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
fn build_feature_maps(
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
indices: Option<&[usize]>,
query_bounds: (f64, f64, f64, f64), // (south, west, north, east)
) -> Vec<Map<String, Value>> {
let mut features = Vec::with_capacity(groups.len());
let (q_south, q_west, q_north, q_east) = query_bounds;
for (&cell_id, aggregation) in groups {
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
continue;
};
// Filter out cells that don't intersect the query bounds
let (c_south, c_west, c_north, c_east) = h3_cell_bounds(cell, 0.0);
if !bounds_intersect(c_south, c_west, c_north, c_east, q_south, q_west, q_north, q_east) {
continue;
}
let mut map = Map::new();
map.insert("h3".into(), Value::String(cell.to_string()));
map.insert("count".into(), Value::Number(aggregation.count.into()));
@ -166,7 +174,7 @@ pub async fn get_hexagons(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -185,11 +193,7 @@ pub async fn get_hexagons(
if name.is_empty() {
return None;
}
state
.data
.feature_names
.iter()
.position(|feat| feat == name)
state.feature_name_to_index.get(name).copied()
})
.collect()
});
@ -209,20 +213,6 @@ pub async fn get_hexagons(
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let has_selective = field_indices.is_some();
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
if has_selective {
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
aggregation.add_row(feature_data, row, num_features);
}
};
let cell_for_row = |row: usize| -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
@ -235,21 +225,48 @@ pub async fn get_hexagons(
.unwrap_or(0)
};
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
aggregate_row(&mut groups, cell_for_row(row), row);
});
// Hoist has_selective branch outside the hot loop to avoid per-row branching
if let Some(sel_indices) = field_indices.as_deref() {
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
let cell_id = cell_for_row(row);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
});
} else {
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
let cell_id = cell_for_row(row);
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features));
aggregation.add_row(feature_data, row, num_features);
});
}
let t_agg = t0.elapsed();
@ -259,6 +276,7 @@ pub async fn get_hexagons(
max_keys,
num_features,
field_indices.as_deref(),
(south, west, north, east),
);
let t_total = t0.elapsed();

View file

@ -1,6 +1,6 @@
use std::sync::Arc;
use axum::extract::Query;
use axum::extract::{Path, Query};
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashMap;
@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::info;
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::parsing::{bounds_intersect, parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
#[derive(Serialize)]
@ -96,7 +96,7 @@ pub async fn get_postcodes(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -113,11 +113,7 @@ pub async fn get_postcodes(
if name.is_empty() {
return None;
}
state
.data
.feature_names
.iter()
.position(|feat| feat == name)
state.feature_name_to_index.get(name).copied()
})
.collect()
});
@ -134,12 +130,6 @@ pub async fn get_postcodes(
let has_selective = field_indices.is_some();
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
// Step 1: Find postcodes within bounds using spatial grid on centroids
let postcode_indices: Vec<u32> = postcode_data.grid.query(south, west, north, east);
// Step 2: For each postcode, aggregate properties
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
// Build postcode -> rows mapping by iterating properties in bounds
// and grouping by their postcode
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
@ -165,24 +155,23 @@ pub async fn get_postcodes(
}
});
// Now aggregate for each postcode that's in bounds and has properties
for &pc_idx in &postcode_indices {
let idx = pc_idx as usize;
if let Some(rows) = postcode_rows.get(&idx) {
let agg = postcode_aggs
.entry(idx)
.or_insert_with(|| PostcodeAgg::new(num_features));
for &row in rows {
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
agg.add_row(feature_data, row, num_features);
}
// Aggregate for each postcode that has properties in bounds
// (polygon intersection check happens later when building response)
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
for (&pc_idx, rows) in &postcode_rows {
let agg = postcode_aggs
.entry(pc_idx)
.or_insert_with(|| PostcodeAgg::new(num_features));
for &row in rows {
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices);
} else {
agg.add_row(feature_data, row, num_features);
}
}
}
// Build response
// Build response, filtering postcodes to only those whose polygon intersects query bounds
let mut features = Vec::with_capacity(postcode_aggs.len());
for (pc_idx, aggregation) in postcode_aggs {
@ -190,6 +179,23 @@ pub async fn get_postcodes(
continue;
}
// Compute postcode polygon bounding box and check intersection with query bounds
let vertices = &postcode_data.vertices[pc_idx];
let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY);
for &[lon, lat] in vertices {
let lon_f = lon as f64;
let lat_f = lat as f64;
if lat_f < pc_south { pc_south = lat_f; }
if lat_f > pc_north { pc_north = lat_f; }
if lon_f < pc_west { pc_west = lon_f; }
if lon_f > pc_east { pc_east = lon_f; }
}
if !bounds_intersect(pc_south, pc_west, pc_north, pc_east, south, west, north, east) {
continue;
}
let mut map = Map::new();
map.insert(
"postcode".into(),
@ -198,7 +204,7 @@ pub async fn get_postcodes(
map.insert("count".into(), Value::Number(aggregation.count.into()));
// Add vertices as array of [lon, lat] pairs
let vertices_array: Vec<Value> = postcode_data.vertices[pc_idx]
let vertices_array: Vec<Value> = vertices
.iter()
.map(|[lon, lat]| Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)]))
.collect();
@ -244,3 +250,44 @@ pub async fn get_postcodes(
Ok(Json(response))
}
#[derive(Serialize)]
pub struct PostcodeLookupResponse {
pub postcode: String,
pub latitude: f64,
pub longitude: f64,
/// Polygon vertices as [[lon, lat], ...] for rendering highlight
pub vertices: Vec<[f64; 2]>,
}
/// Look up a single postcode and return its centroid coordinates and polygon.
pub async fn get_postcode_lookup(
state: Arc<AppState>,
Path(postcode): Path<String>,
) -> Result<Json<PostcodeLookupResponse>, StatusCode> {
// Normalize the postcode: uppercase, remove extra spaces, ensure single space
let normalized = postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
let postcode_data = &state.postcode_data;
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
let (lat, lon) = postcode_data.centroids[idx];
let vertices: Vec<[f64; 2]> = postcode_data.vertices[idx]
.iter()
.map(|[lo, la]| [*lo as f64, *la as f64])
.collect();
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
Ok(Json(PostcodeLookupResponse {
postcode: normalized,
latitude: lat as f64,
longitude: lon as f64,
vertices,
}))
} else {
Err(StatusCode::NOT_FOUND)
}
}

View file

@ -66,7 +66,7 @@ fn non_empty_string(text: &str) -> Option<String> {
/// Look up an enum feature value by trying multiple possible column names.
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
fn lookup_enum_value(
feature_names: &[String],
feature_name_to_index: &FxHashMap<String, usize>,
feature_data: &[f32],
num_features: usize,
enum_values: &FxHashMap<usize, Vec<String>>,
@ -74,7 +74,7 @@ fn lookup_enum_value(
names: &[&str],
) -> Option<String> {
for name in names {
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) {
if let Some(&feat_idx) = feature_name_to_index.get(*name) {
if let Some(values) = enum_values.get(&feat_idx) {
let value = feature_data[row * num_features + feat_idx];
if value.is_finite() {
@ -120,7 +120,7 @@ pub async fn get_hexagon_properties(
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
@ -134,6 +134,7 @@ pub async fn get_hexagon_properties(
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let feature_names = &state.data.feature_names;
let feature_name_to_index = &state.feature_name_to_index;
let enum_values = &state.data.enum_values;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
@ -199,7 +200,7 @@ pub async fn get_hexagon_properties(
postcode: non_empty_string(state.data.postcode(row)),
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
property_type: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -207,7 +208,7 @@ pub async fn get_hexagon_properties(
&["Property type", "epc_property_type", "pp_property_type"],
),
built_form: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -215,7 +216,7 @@ pub async fn get_hexagon_properties(
&["Property type/built form", "built_form"],
),
duration: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -223,7 +224,7 @@ pub async fn get_hexagon_properties(
&["Leashold/Freehold", "duration"],
),
current_energy_rating: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,
@ -231,7 +232,7 @@ pub async fn get_hexagon_properties(
&["Current energy rating", "current_energy_rating"],
),
potential_energy_rating: lookup_enum_value(
feature_names,
feature_name_to_index,
feature_data,
num_features,
enum_values,

View file

@ -106,18 +106,7 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
"paint": { "background-color": bg_color }
})];
// Water layer
if layer_ids.contains(&"water") {
style_layers.push(serde_json::json!({
"id": "water",
"type": "fill",
"source": "protomaps",
"source-layer": "water",
"paint": { "fill-color": water_color }
}));
}
// Land/earth layer
// Land/earth layer (must come before water so rivers render on top)
if layer_ids.contains(&"earth") {
style_layers.push(serde_json::json!({
"id": "earth",
@ -128,9 +117,9 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
}));
}
// Landuse
// Landuse (parks, forests) - render before water
if layer_ids.contains(&"landuse") {
let landuse_color = if is_dark { "#1f2d1f" } else { "#d8e8c8" };
let park_color = if is_dark { "#2d4a2d" } else { "#c8e6c8" };
style_layers.push(serde_json::json!({
"id": "landuse-park",
"type": "fill",
@ -141,7 +130,18 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
["==", ["get", "pmap:kind"], "nature_reserve"],
["==", ["get", "pmap:kind"], "forest"]
],
"paint": { "fill-color": landuse_color, "fill-opacity": 0.5 }
"paint": { "fill-color": park_color, "fill-opacity": 0.7 }
}));
}
// Water layer (after earth so rivers show on top of land)
if layer_ids.contains(&"water") {
style_layers.push(serde_json::json!({
"id": "water",
"type": "fill",
"source": "protomaps",
"source-layer": "water",
"paint": { "fill-color": water_color }
}));
}

View file

@ -0,0 +1,974 @@
//! Comprehensive semantic tests for the server.
//!
//! These tests validate the correctness of data processing, aggregation logic,
//! spatial queries, and filter semantics without requiring real data files.
#[cfg(test)]
mod tests {
use rustc_hash::FxHashMap;
use crate::data::{compute_feature_stats, Histogram};
use crate::features::Bounds;
use crate::parsing::{
bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters,
ParsedEnumFilter, ParsedFilter,
};
use crate::utils::GridIndex;
// =========================================================================
// GridIndex Tests
// =========================================================================
mod grid_index {
use super::*;
#[test]
fn empty_grid_returns_empty() {
let grid = GridIndex::build(&[], &[], 0.01);
assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty());
}
#[test]
fn single_point_inside_query() {
let lat = vec![51.5_f32];
let lon = vec![-0.1_f32];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query that contains the point
let result = grid.query(51.4, -0.2, 51.6, 0.0);
assert_eq!(result.len(), 1);
assert_eq!(result[0], 0);
}
#[test]
fn single_point_outside_query() {
let lat = vec![51.5_f32];
let lon = vec![-0.1_f32];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query that doesn't contain the point
let result = grid.query(52.0, 0.0, 53.0, 1.0);
assert!(result.is_empty());
}
#[test]
fn multiple_points_partial_query() {
let lat = vec![51.5_f32, 51.6, 51.7, 52.0];
let lon = vec![-0.1_f32, -0.1, -0.1, -0.1];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query that contains only points 0, 1, 2
let result = grid.query(51.4, -0.2, 51.8, 0.0);
assert_eq!(result.len(), 3);
assert!(result.contains(&0));
assert!(result.contains(&1));
assert!(result.contains(&2));
assert!(!result.contains(&3));
}
#[test]
fn query_at_grid_boundary() {
// Points at exactly cell boundaries
let lat = vec![51.0_f32, 51.01, 51.02];
let lon = vec![0.0_f32, 0.01, 0.02];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query just past the first cell
let result = grid.query(50.99, -0.01, 51.005, 0.005);
assert!(result.contains(&0));
}
#[test]
fn for_each_matches_query() {
let lat = vec![51.5_f32, 51.6, 51.7];
let lon = vec![-0.1_f32, -0.2, -0.3];
let grid = GridIndex::build(&lat, &lon, 0.01);
let query_result = grid.query(51.4, -0.25, 51.65, 0.0);
let mut foreach_result = Vec::new();
grid.for_each_in_bounds(51.4, -0.25, 51.65, 0.0, |idx| {
foreach_result.push(idx);
});
// Both methods should return the same indices
assert_eq!(query_result.len(), foreach_result.len());
for idx in &query_result {
assert!(foreach_result.contains(idx));
}
}
#[test]
fn negative_coordinates() {
let lat = vec![-33.9_f32, -33.8, -33.7];
let lon = vec![151.2_f32, 151.3, 151.4];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query: south=-34.0, north=-33.65
// -33.9 is in range (between -34 and -33.65), lon 151.2 in range (151.1 to 151.5) ✓
// -33.8 is in range, lon 151.3 in range ✓
// -33.7 is in range, lon 151.4 in range ✓
let result = grid.query(-34.0, 151.1, -33.65, 151.5);
assert_eq!(result.len(), 3);
}
#[test]
fn query_bounds_completely_outside_grid() {
let lat = vec![51.5_f32];
let lon = vec![-0.1_f32];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query in a completely different area
let result = grid.query(0.0, 100.0, 10.0, 110.0);
assert!(result.is_empty());
}
#[test]
fn very_small_cell_size() {
let lat = vec![51.5_f32, 51.5001, 51.5002];
let lon = vec![-0.1_f32, -0.1001, -0.1002];
let grid = GridIndex::build(&lat, &lon, 0.0001);
let result = grid.query(51.4999, -0.1003, 51.5003, -0.0999);
assert_eq!(result.len(), 3);
}
}
// =========================================================================
// Filter Parsing Tests
// =========================================================================
mod filter_parsing {
use super::*;
fn make_feature_name_to_index() -> FxHashMap<String, usize> {
[
("Price".into(), 0),
("Area".into(), 1),
("Rating".into(), 2),
("Type".into(), 3),
]
.into_iter()
.collect()
}
fn make_enum_values() -> FxHashMap<usize, Vec<String>> {
let mut map = FxHashMap::default();
// Feature index 3 (Type) is an enum
map.insert(3, vec!["Detached".into(), "Semi".into(), "Terraced".into(), "Flat".into()]);
map
}
#[test]
fn parse_single_numeric_filter() {
let (numeric, enums) = parse_filters(
Some("Price:100000:500000"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert_eq!(numeric.len(), 1);
assert!(enums.is_empty());
assert_eq!(numeric[0].feat_idx, 0);
assert_eq!(numeric[0].min, 100000.0);
assert_eq!(numeric[0].max, 500000.0);
}
#[test]
fn parse_multiple_numeric_filters() {
let (numeric, _enums) = parse_filters(
Some("Price:100000:500000,Area:50:200"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert_eq!(numeric.len(), 2);
assert_eq!(numeric[0].feat_idx, 0);
assert_eq!(numeric[1].feat_idx, 1);
}
#[test]
fn parse_single_enum_filter() {
let (numeric, enums) = parse_filters(
Some("Type:Detached|Flat"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert!(numeric.is_empty());
assert_eq!(enums.len(), 1);
assert_eq!(enums[0].feat_idx, 3);
assert_eq!(enums[0].allowed, vec![0.0, 3.0]); // Detached=0, Flat=3
}
#[test]
fn parse_mixed_filters() {
let (numeric, enums) = parse_filters(
Some("Price:100000:500000,Type:Semi|Terraced"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert_eq!(numeric.len(), 1);
assert_eq!(enums.len(), 1);
}
#[test]
fn parse_unknown_feature_ignored() {
let (numeric, enums) = parse_filters(
Some("Unknown:100:200"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert!(numeric.is_empty());
assert!(enums.is_empty());
}
#[test]
fn parse_invalid_numeric_format_ignored() {
let (numeric, enums) = parse_filters(
Some("Price:not_a_number:500000"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert!(numeric.is_empty());
assert!(enums.is_empty());
}
#[test]
fn parse_enum_with_unknown_value() {
let (_numeric, enums) = parse_filters(
Some("Type:Detached|Unknown|Flat"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert_eq!(enums.len(), 1);
// Unknown is filtered out, only Detached and Flat remain
assert_eq!(enums[0].allowed, vec![0.0, 3.0]);
}
#[test]
fn parse_empty_filter_string() {
let (numeric, enums) = parse_filters(
Some(""),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert!(numeric.is_empty());
assert!(enums.is_empty());
}
#[test]
fn parse_none_filter() {
let (numeric, enums) = parse_filters(
None,
&make_feature_name_to_index(),
&make_enum_values(),
);
assert!(numeric.is_empty());
assert!(enums.is_empty());
}
#[test]
fn parse_filter_with_whitespace() {
let (numeric, enums) = parse_filters(
Some("Price : 100000 : 500000 , Type : Detached | Flat"),
&make_feature_name_to_index(),
&make_enum_values(),
);
assert_eq!(numeric.len(), 1);
assert_eq!(enums.len(), 1);
}
}
// =========================================================================
// Filter Application Tests
// =========================================================================
mod filter_application {
use super::*;
#[test]
fn row_passes_no_filters() {
let feature_data = vec![100.0_f32, 50.0];
assert!(row_passes_filters(0, &[], &[], &feature_data, 2));
}
#[test]
fn row_passes_numeric_filter_in_range() {
let feature_data = vec![150.0_f32];
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
assert!(row_passes_filters(0, &filters, &[], &feature_data, 1));
}
#[test]
fn row_fails_numeric_filter_below_min() {
let feature_data = vec![50.0_f32];
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
}
#[test]
fn row_fails_numeric_filter_above_max() {
let feature_data = vec![250.0_f32];
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
}
#[test]
fn row_passes_numeric_filter_at_boundary() {
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
// At min boundary
assert!(row_passes_filters(0, &filters, &[], &[100.0], 1));
// At max boundary
assert!(row_passes_filters(0, &filters, &[], &[200.0], 1));
}
#[test]
fn row_fails_numeric_filter_with_nan() {
let feature_data = vec![f32::NAN];
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
}
#[test]
fn row_passes_enum_filter_allowed_value() {
let feature_data = vec![1.0_f32]; // Index 1
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 0,
allowed: vec![0.0, 1.0, 2.0],
}];
assert!(row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
fn row_fails_enum_filter_disallowed_value() {
let feature_data = vec![3.0_f32]; // Index 3 not in allowed
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 0,
allowed: vec![0.0, 1.0, 2.0],
}];
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
fn row_fails_enum_filter_with_nan() {
let feature_data = vec![f32::NAN];
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 0,
allowed: vec![0.0, 1.0, 2.0],
}];
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
fn row_fails_empty_enum_filter() {
let feature_data = vec![1.0_f32];
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 0,
allowed: vec![], // Empty allowed list
}];
// Empty allowed means nothing passes
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
fn multiple_filters_all_must_pass() {
// Row with two features: price=150, type=1
let feature_data = vec![150.0_f32, 1.0];
let numeric_filters = vec![ParsedFilter {
feat_idx: 0,
min: 100.0,
max: 200.0,
}];
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 1,
allowed: vec![1.0, 2.0],
}];
assert!(row_passes_filters(0, &numeric_filters, &enum_filters, &feature_data, 2));
// Change enum filter to not include 1.0
let enum_filters_fail = vec![ParsedEnumFilter {
feat_idx: 1,
allowed: vec![0.0, 2.0],
}];
assert!(!row_passes_filters(0, &numeric_filters, &enum_filters_fail, &feature_data, 2));
}
#[test]
fn row_major_layout_correct_indexing() {
// 3 rows, 2 features each
// Row 0: [100, 0], Row 1: [200, 1], Row 2: [300, 2]
let feature_data = vec![
100.0_f32, 0.0, // Row 0
200.0, 1.0, // Row 1
300.0, 2.0, // Row 2
];
let num_features = 2;
let filters = vec![ParsedFilter {
feat_idx: 0,
min: 150.0,
max: 250.0,
}];
assert!(!row_passes_filters(0, &filters, &[], &feature_data, num_features)); // 100 not in range
assert!(row_passes_filters(1, &filters, &[], &feature_data, num_features)); // 200 in range
assert!(!row_passes_filters(2, &filters, &[], &feature_data, num_features)); // 300 not in range
}
}
// =========================================================================
// Bounds Parsing Tests
// =========================================================================
mod bounds_parsing {
use super::*;
#[test]
fn parse_valid_bounds() {
let (south, west, north, east) = parse_bounds("51.0,-0.5,52.0,0.5").unwrap();
assert_eq!(south, 51.0);
assert_eq!(west, -0.5);
assert_eq!(north, 52.0);
assert_eq!(east, 0.5);
}
#[test]
fn parse_bounds_with_spaces() {
let (south, west, _north, _east) = parse_bounds("51.0, -0.5, 52.0, 0.5").unwrap();
assert_eq!(south, 51.0);
assert_eq!(west, -0.5);
}
#[test]
fn parse_bounds_negative_values() {
let (south, _west, north, _east) = parse_bounds("-51.5,-0.5,-50.0,0.5").unwrap();
assert_eq!(south, -51.5);
assert_eq!(north, -50.0);
}
#[test]
fn parse_bounds_invalid_too_few_parts() {
assert!(parse_bounds("51.0,-0.5,52.0").is_err());
}
#[test]
fn parse_bounds_invalid_too_many_parts() {
assert!(parse_bounds("51.0,-0.5,52.0,0.5,1.0").is_err());
}
#[test]
fn parse_bounds_invalid_non_numeric() {
assert!(parse_bounds("51.0,abc,52.0,0.5").is_err());
}
#[test]
fn parse_bounds_empty_string() {
assert!(parse_bounds("").is_err());
}
}
// =========================================================================
// Bounds Intersection Tests
// =========================================================================
mod bounds_intersection {
use super::*;
#[test]
fn overlapping_boxes_intersect() {
assert!(bounds_intersect(
0.0, 0.0, 2.0, 2.0, // Box A
1.0, 1.0, 3.0, 3.0 // Box B overlaps
));
}
#[test]
fn one_box_inside_other_intersects() {
assert!(bounds_intersect(
0.0, 0.0, 10.0, 10.0, // Box A (large)
2.0, 2.0, 5.0, 5.0 // Box B (inside A)
));
}
#[test]
fn touching_at_corner_intersects() {
assert!(bounds_intersect(
0.0, 0.0, 1.0, 1.0, // Box A
1.0, 1.0, 2.0, 2.0 // Box B touches at (1,1)
));
}
#[test]
fn touching_at_edge_intersects() {
assert!(bounds_intersect(
0.0, 0.0, 1.0, 1.0, // Box A
1.0, 0.0, 2.0, 1.0 // Box B touches along right edge
));
}
#[test]
fn disjoint_horizontally_no_intersect() {
assert!(!bounds_intersect(
0.0, 0.0, 1.0, 1.0, // Box A
0.0, 2.0, 1.0, 3.0 // Box B to the right
));
}
#[test]
fn disjoint_vertically_no_intersect() {
assert!(!bounds_intersect(
0.0, 0.0, 1.0, 1.0, // Box A
2.0, 0.0, 3.0, 1.0 // Box B above
));
}
#[test]
fn disjoint_diagonally_no_intersect() {
assert!(!bounds_intersect(
0.0, 0.0, 1.0, 1.0, // Box A
2.0, 2.0, 3.0, 3.0 // Box B diagonally away
));
}
#[test]
fn negative_coordinates_intersect() {
assert!(bounds_intersect(
-2.0, -2.0, -1.0, -1.0, // Box A (negative coords)
-1.5, -1.5, -0.5, -0.5 // Box B overlaps
));
}
}
// =========================================================================
// H3 Cell Bounds Tests
// =========================================================================
mod h3_bounds {
use super::*;
use std::str::FromStr;
#[test]
fn h3_cell_bounds_zero_buffer() {
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
let (south, west, north, east) = h3_cell_bounds(cell, 0.0);
// San Francisco area, should be roughly 37.77°N, -122.4°W
assert!(south < north, "south {} should be < north {}", south, north);
assert!(west < east, "west {} should be < east {}", west, east);
assert!(south > 30.0 && south < 45.0);
assert!(west < -100.0);
}
#[test]
fn h3_cell_bounds_with_buffer() {
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
let (s0, w0, n0, e0) = h3_cell_bounds(cell, 0.0);
let (s1, w1, n1, e1) = h3_cell_bounds(cell, 0.1);
// With buffer, bounds should be larger
assert!(s1 < s0, "south with buffer should be smaller");
assert!(w1 < w0, "west with buffer should be smaller");
assert!(n1 > n0, "north with buffer should be larger");
assert!(e1 > e0, "east with buffer should be larger");
// Buffer should be exactly 0.1 degrees
assert!((s0 - s1 - 0.1).abs() < 1e-10);
assert!((w0 - w1 - 0.1).abs() < 1e-10);
}
#[test]
fn h3_cell_bounds_different_resolutions() {
// Resolution 9 cell
let cell_high = h3o::CellIndex::from_str("8928308280fffff").unwrap();
// Get its resolution 5 parent
let res5 = h3o::Resolution::try_from(5).unwrap();
let cell_low = cell_high.parent(res5).unwrap();
let (s_low, w_low, n_low, e_low) = h3_cell_bounds(cell_low, 0.0);
let (s_high, w_high, n_high, e_high) = h3_cell_bounds(cell_high, 0.0);
// Lower resolution cell should have larger bounds
let area_low = (n_low - s_low) * (e_low - w_low);
let area_high = (n_high - s_high) * (e_high - w_high);
assert!(area_low > area_high, "Lower res should have larger area");
}
}
// =========================================================================
// Histogram Computation Tests
// =========================================================================
mod histogram {
use super::*;
fn make_fixed_bounds(min: f32, max: f32) -> Bounds {
Bounds::Fixed { min, max }
}
fn make_percentile_bounds(low: f64, high: f64) -> Bounds {
Bounds::Percentile { low, high }
}
#[test]
fn histogram_empty_data() {
let data: Vec<f32> = vec![];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.slider_min, 0.0);
assert_eq!(stats.slider_max, 100.0);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 0);
}
#[test]
fn histogram_single_value() {
let data = vec![50.0_f32];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.histogram.min, 50.0);
assert_eq!(stats.histogram.max, 50.0);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 1);
}
#[test]
fn histogram_uniform_distribution() {
// 100 values from 0 to 99
let data: Vec<f32> = (0..100).map(|i| i as f32).collect();
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.histogram.min, 0.0);
assert_eq!(stats.histogram.max, 99.0);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 100);
}
#[test]
fn histogram_with_nan_values() {
let data = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 30.0];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
// Only 3 non-NaN values
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
assert_eq!(stats.histogram.min, 10.0);
assert_eq!(stats.histogram.max, 30.0);
}
#[test]
fn histogram_all_nan() {
let data = vec![f32::NAN, f32::NAN, f32::NAN];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 0);
}
#[test]
fn histogram_all_same_value() {
let data = vec![42.0_f32; 1000];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.histogram.min, 42.0);
assert_eq!(stats.histogram.max, 42.0);
assert_eq!(stats.histogram.p1, 42.0);
assert_eq!(stats.histogram.p99, 42.0);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 1000);
}
#[test]
fn histogram_percentile_bounds() {
// Data with outliers: 1 very low, 1 very high, 98 in middle
let mut data: Vec<f32> = vec![0.0]; // Low outlier
data.extend((1..99).map(|i| 50.0 + i as f32 * 0.01)); // Main data around 50
data.push(1000.0); // High outlier
let bounds = make_percentile_bounds(2.0, 98.0);
let stats = compute_feature_stats(&data, &bounds);
// Slider should exclude outliers
assert!(stats.slider_min > 0.0);
assert!(stats.slider_max < 1000.0);
}
#[test]
fn histogram_bin_for_value() {
let hist = Histogram {
min: 0.0,
max: 100.0,
p1: 10.0,
p99: 90.0,
counts: vec![0; 10], // 10 bins
};
// Low outlier bin (bin 0)
assert_eq!(hist.bin_for_value(5.0), 0);
// High outlier bin (bin 9)
assert_eq!(hist.bin_for_value(95.0), 9);
// Middle bins (bins 1-8)
let mid_value = 50.0;
let bin = hist.bin_for_value(mid_value);
assert!(bin >= 1 && bin <= 8);
}
#[test]
fn histogram_middle_bin_width() {
let hist = Histogram {
min: 0.0,
max: 100.0,
p1: 10.0,
p99: 90.0,
counts: vec![0; 10], // 10 bins
};
// Middle bins span p1 to p99 (80 units) across 8 bins (10 - 2 outlier bins)
let expected_width = (90.0 - 10.0) / 8.0;
assert!((hist.middle_bin_width() - expected_width).abs() < 0.001);
}
#[test]
fn histogram_cardinality_caps_bins() {
// Only 3 unique values - should cap bins at 3
let data = vec![1.0_f32, 1.0, 2.0, 2.0, 3.0, 3.0];
let bounds = make_fixed_bounds(0.0, 100.0);
let stats = compute_feature_stats(&data, &bounds);
// Bins should be capped at cardinality (3)
assert_eq!(stats.histogram.counts.len(), 3);
}
}
// =========================================================================
// Aggregation Semantics Tests
// =========================================================================
mod aggregation {
/// Test that min/max aggregation correctly handles NaN values
#[test]
fn min_max_skips_nan() {
let values = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 5.0];
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
for &v in &values {
if v.is_finite() {
if v < min {
min = v;
}
if v > max {
max = v;
}
}
}
assert_eq!(min, 5.0);
assert_eq!(max, 20.0);
}
/// Test that counting only counts non-NaN values
#[test]
fn count_skips_nan() {
let values = vec![1.0_f32, f32::NAN, 2.0, f32::NAN, 3.0];
let count = values.iter().filter(|v| v.is_finite()).count();
assert_eq!(count, 3);
}
/// Test enum value counting with indices
#[test]
fn enum_value_counting() {
// Enum values: 0.0=Detached, 1.0=Semi, 2.0=Terraced, 3.0=Flat
let values = vec![0.0_f32, 1.0, 1.0, 2.0, f32::NAN, 3.0, 1.0];
let enum_count = 4;
let mut counts = vec![0u64; enum_count];
for &v in &values {
if v.is_finite() {
let idx = v as usize;
if idx < enum_count {
counts[idx] += 1;
}
}
}
assert_eq!(counts[0], 1); // Detached
assert_eq!(counts[1], 3); // Semi
assert_eq!(counts[2], 1); // Terraced
assert_eq!(counts[3], 1); // Flat
}
}
// =========================================================================
// H3 Resolution Tests
// =========================================================================
mod h3_resolution {
use std::str::FromStr;
#[test]
fn parent_cell_at_lower_resolution() {
// Resolution 9 cell
let child = h3o::CellIndex::from_str("8928308280fffff").unwrap();
// Get parent at resolution 7
let parent_res = h3o::Resolution::try_from(7).unwrap();
let parent = child.parent(parent_res).unwrap();
assert_eq!(parent.resolution(), parent_res);
// Child should be contained in parent
assert!(parent.children(child.resolution()).any(|c| c == child));
}
#[test]
fn same_resolution_returns_self() {
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
let res = cell.resolution();
// Getting parent at same resolution should return the cell itself
let parent = cell.parent(res);
assert_eq!(parent, Some(cell));
}
#[test]
fn higher_resolution_parent_fails() {
// Resolution 9 cell
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
// Try to get "parent" at higher resolution (impossible)
let higher_res = h3o::Resolution::try_from(10).unwrap();
let parent = cell.parent(higher_res);
assert!(parent.is_none());
}
}
// =========================================================================
// Edge Cases and Error Handling
// =========================================================================
mod edge_cases {
use super::*;
#[test]
fn very_large_coordinates() {
let lat = vec![89.9_f32, -89.9];
let lon = vec![179.9_f32, -179.9];
let grid = GridIndex::build(&lat, &lon, 0.01);
let result = grid.query(-90.0, -180.0, 90.0, 180.0);
assert_eq!(result.len(), 2);
}
#[test]
fn filter_at_float_precision_boundary() {
let value = 100.0_f32;
let filter = ParsedFilter {
feat_idx: 0,
min: 100.0 - f32::EPSILON,
max: 100.0 + f32::EPSILON,
};
assert!(row_passes_filters(0, &[filter], &[], &[value], 1));
}
#[test]
fn enum_filter_with_fractional_index() {
// What happens if the stored value isn't exactly an integer?
let feature_data = vec![1.5_f32]; // Not exactly 1.0 or 2.0
let enum_filters = vec![ParsedEnumFilter {
feat_idx: 0,
allowed: vec![1.0, 2.0],
}];
// 1.5 is not in the allowed list [1.0, 2.0]
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
}
#[test]
#[test]
fn bounds_with_inverted_min_max() {
// What if south > north? (Invalid input)
// The parse_bounds function doesn't validate this
let (south, _west, north, _east) = parse_bounds("52.0,-0.5,51.0,0.5").unwrap();
assert_eq!(south, 52.0);
assert_eq!(north, 51.0);
// south > north is allowed by parsing but logically invalid
// GridIndex should handle this gracefully
let lat = vec![51.5_f32];
let lon = vec![-0.1_f32];
let grid = GridIndex::build(&lat, &lon, 0.01);
// Query with inverted bounds returns empty (row_min > row_max is rejected)
let result = grid.query(52.0, -0.5, 51.0, 0.5);
assert!(result.is_empty(), "Inverted bounds should return empty");
}
#[test]
fn infinity_values_in_data() {
// NOTE: The current implementation uses !is_nan() not is_finite()
// So INFINITY values ARE included in min/max calculations.
// This documents current behavior - consider if this should be fixed.
let data = vec![f32::INFINITY, f32::NEG_INFINITY, 50.0];
let bounds = Bounds::Fixed {
min: 0.0,
max: 100.0,
};
let stats = compute_feature_stats(&data, &bounds);
// Current behavior: infinity is included (uses !is_nan())
assert_eq!(stats.histogram.min, f32::NEG_INFINITY);
assert_eq!(stats.histogram.max, f32::INFINITY);
// All 3 values are counted (none are NaN)
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
}
#[test]
fn only_finite_values() {
// Test that normal finite values work correctly
let data = vec![10.0_f32, 20.0, 30.0];
let bounds = Bounds::Fixed {
min: 0.0,
max: 100.0,
};
let stats = compute_feature_stats(&data, &bounds);
assert_eq!(stats.histogram.min, 10.0);
assert_eq!(stats.histogram.max, 30.0);
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
}
}
}

View file

@ -1,3 +1,5 @@
use rustc_hash::FxHashMap;
use crate::data::{POICategoryGroup, POIData, PostcodeData, PropertyData};
use crate::routes::FeaturesResponse;
use crate::utils::GridIndex;
@ -12,6 +14,8 @@ pub struct AppState {
pub poi_grid: GridIndex,
/// Postcode boundary data for high-zoom rendering
pub postcode_data: PostcodeData,
/// O(1) lookup: feature name → index in feature_names/feature_data
pub feature_name_to_index: FxHashMap<String, usize>,
/// Precomputed JSON key names: "min_{feature_name}" for each feature
pub min_keys: Vec<String>,
/// Precomputed JSON key names: "max_{feature_name}" for each feature

View file

@ -33,24 +33,19 @@ impl GridIndex {
};
}
// Single pass: compute min/max bounds and count items per cell
let mut min_lat = f32::INFINITY;
let mut max_lat = f32::NEG_INFINITY;
let mut min_lon = f32::INFINITY;
let mut max_lon = f32::NEG_INFINITY;
for index in 0..lat.len() {
if lat[index] < min_lat {
min_lat = lat[index];
}
if lat[index] > max_lat {
max_lat = lat[index];
}
if lon[index] < min_lon {
min_lon = lon[index];
}
if lon[index] > max_lon {
max_lon = lon[index];
}
let la = lat[index];
let lo = lon[index];
if la < min_lat { min_lat = la; }
if la > max_lat { max_lat = la; }
if lo < min_lon { min_lon = lo; }
if lo > max_lon { max_lon = lo; }
}
min_lat -= cell_size;
@ -70,7 +65,7 @@ impl GridIndex {
"Building grid index (CSR)"
);
// First pass: count items per cell
// Count items per cell (now we know the bounds)
let mut counts = vec![0u32; num_cells];
for index in 0..lat.len() {
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;