Various fixes
This commit is contained in:
parent
34a4d0ba86
commit
55598aaaa0
14 changed files with 1250 additions and 130 deletions
|
|
@ -4,4 +4,4 @@ mod property;
|
|||
|
||||
pub use poi::{POICategoryGroup, POIData};
|
||||
pub use postcodes::PostcodeData;
|
||||
pub use property::{precompute_h3, Histogram, PropertyData};
|
||||
pub use property::{compute_feature_stats, precompute_h3, Histogram, PropertyData};
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ mod metrics;
|
|||
mod og_middleware;
|
||||
pub mod parsing;
|
||||
mod routes;
|
||||
#[cfg(test)]
|
||||
mod semantic_tests;
|
||||
mod state;
|
||||
pub mod utils;
|
||||
|
||||
|
|
@ -147,6 +149,13 @@ async fn main() -> anyhow::Result<()> {
|
|||
let tile_reader = Arc::new(routes::init_tile_reader(tiles_path).await?);
|
||||
info!("PMTiles loaded successfully");
|
||||
|
||||
let feature_name_to_index: rustc_hash::FxHashMap<String, usize> = property_data
|
||||
.feature_names
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, name)| (name.clone(), idx))
|
||||
.collect();
|
||||
|
||||
let min_keys: Vec<String> = property_data
|
||||
.feature_names
|
||||
.iter()
|
||||
|
|
@ -219,6 +228,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
poi_data,
|
||||
poi_grid,
|
||||
postcode_data,
|
||||
feature_name_to_index,
|
||||
min_keys,
|
||||
max_keys,
|
||||
poi_category_groups,
|
||||
|
|
@ -237,6 +247,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
let state_features = state.clone();
|
||||
let state_hexagons = state.clone();
|
||||
let state_postcodes = state.clone();
|
||||
let state_postcode_lookup = state.clone();
|
||||
let state_pois = state.clone();
|
||||
let state_poi_categories = state.clone();
|
||||
let state_hexagon_properties = state.clone();
|
||||
|
|
@ -257,6 +268,10 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/postcodes",
|
||||
get(move |query| routes::get_postcodes(state_postcodes.clone(), query)),
|
||||
)
|
||||
.route(
|
||||
"/api/postcode/{postcode}",
|
||||
get(move |path| routes::get_postcode_lookup(state_postcode_lookup.clone(), path)),
|
||||
)
|
||||
.route(
|
||||
"/api/pois",
|
||||
get(move |query| routes::get_pois(state_pois.clone(), query)),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
mod bounds;
|
||||
mod filters;
|
||||
|
||||
pub use bounds::{h3_cell_bounds, parse_bounds};
|
||||
pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds};
|
||||
pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter};
|
||||
|
|
|
|||
|
|
@ -1,11 +1,29 @@
|
|||
use axum::http::StatusCode;
|
||||
|
||||
/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
|
||||
/// Check if two bounding boxes intersect.
|
||||
/// Both boxes are (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon).
|
||||
#[inline]
|
||||
pub fn bounds_intersect(
|
||||
a_south: f64,
|
||||
a_west: f64,
|
||||
a_north: f64,
|
||||
a_east: f64,
|
||||
b_south: f64,
|
||||
b_west: f64,
|
||||
b_north: f64,
|
||||
b_east: f64,
|
||||
) -> bool {
|
||||
a_west <= b_east && a_east >= b_west && a_south <= b_north && a_north >= b_south
|
||||
}
|
||||
|
||||
/// Compute the lat/lon bounding box of an H3 cell in degrees, with a configurable buffer in degrees.
|
||||
/// Returns (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon).
|
||||
pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
|
||||
let boundary = cell.boundary();
|
||||
let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
for vertex in boundary.iter() {
|
||||
// h3o LatLng::lat()/lng() return degrees
|
||||
let lat = vertex.lat();
|
||||
let lon = vertex.lng();
|
||||
if lat < min_lat {
|
||||
|
|
@ -81,4 +99,42 @@ mod tests {
|
|||
assert!((buf_max_lat - max_lat - 0.1).abs() < 1e-10);
|
||||
assert!((buf_max_lon - max_lon - 0.1).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h3_cell_bounds_returns_degrees_not_radians() {
|
||||
// Cell "8928308280fffff" is in San Francisco area (~37.77°N, ~-122.4°W)
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.0);
|
||||
|
||||
// If h3o returned radians, values would be < π ≈ 3.14
|
||||
// Latitude ~37.77° proves we're getting degrees, not radians
|
||||
assert!(min_lat > 30.0 && min_lat < 45.0, "min_lat {} should be ~37° (degrees)", min_lat);
|
||||
assert!(max_lat > 30.0 && max_lat < 45.0, "max_lat {} should be ~37° (degrees)", max_lat);
|
||||
|
||||
// Longitude ~-122° also proves degrees (radians would be < π)
|
||||
assert!(min_lon < -100.0, "min_lon {} should be ~-122° (degrees)", min_lon);
|
||||
assert!(max_lon < -100.0, "max_lon {} should be ~-122° (degrees)", max_lon);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bounds_intersect_overlapping() {
|
||||
// Two overlapping boxes
|
||||
assert!(bounds_intersect(0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0));
|
||||
// Box B is inside box A
|
||||
assert!(bounds_intersect(0.0, 0.0, 10.0, 10.0, 2.0, 2.0, 5.0, 5.0));
|
||||
// Box A is inside box B
|
||||
assert!(bounds_intersect(2.0, 2.0, 5.0, 5.0, 0.0, 0.0, 10.0, 10.0));
|
||||
// Touching at edge
|
||||
assert!(bounds_intersect(0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bounds_intersect_non_overlapping() {
|
||||
// Box B is to the right of box A
|
||||
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0));
|
||||
// Box B is above box A
|
||||
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0));
|
||||
// Completely separate
|
||||
assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use rustc_hash::FxHashMap;
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
|
||||
/// Filter for numeric features: value must be in [min, max] range.
|
||||
pub struct ParsedFilter {
|
||||
|
|
@ -8,9 +8,11 @@ pub struct ParsedFilter {
|
|||
}
|
||||
|
||||
/// Filter for enum features: value must be one of the allowed indices.
|
||||
/// Uses FxHashSet<u32> (f32 bits) for O(1) lookups instead of O(n) Vec::contains.
|
||||
pub struct ParsedEnumFilter {
|
||||
pub feat_idx: usize,
|
||||
pub allowed: Vec<f32>,
|
||||
/// Allowed enum indices stored as f32 bits for exact comparison
|
||||
pub allowed: FxHashSet<u32>,
|
||||
}
|
||||
|
||||
/// Parse comma-separated filter string into numeric and enum filters.
|
||||
|
|
@ -18,7 +20,7 @@ pub struct ParsedEnumFilter {
|
|||
/// Enum format: `name:val1|val2|val3` (pipe-separated string values)
|
||||
pub fn parse_filters(
|
||||
filter_str: Option<&str>,
|
||||
feature_names: &[String],
|
||||
feature_name_to_index: &FxHashMap<String, usize>,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
|
||||
let mut numeric = Vec::new();
|
||||
|
|
@ -37,22 +39,22 @@ pub fn parse_filters(
|
|||
let name = parts[0].trim();
|
||||
let rest = parts[1].trim();
|
||||
|
||||
// Find feature index by name
|
||||
let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) else {
|
||||
// Find feature index by name (O(1) lookup)
|
||||
let Some(&feat_idx) = feature_name_to_index.get(name) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Check if this is an enum feature
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
// Enum filter: convert string values to f32 indices
|
||||
let allowed: Vec<f32> = rest
|
||||
// Enum filter: convert string values to f32 indices (stored as bits for O(1) lookup)
|
||||
let allowed: FxHashSet<u32> = rest
|
||||
.split('|')
|
||||
.filter_map(|value| {
|
||||
let value = value.trim();
|
||||
values
|
||||
.iter()
|
||||
.position(|existing| existing == value)
|
||||
.map(|position| position as f32)
|
||||
.map(|position| (position as f32).to_bits())
|
||||
})
|
||||
.collect();
|
||||
enums.push(ParsedEnumFilter { feat_idx, allowed });
|
||||
|
|
@ -93,7 +95,8 @@ pub fn row_passes_filters(
|
|||
value.is_finite() && value >= filter.min && value <= filter.max
|
||||
}) && enum_filters.iter().all(|filter| {
|
||||
let value = feature_data[base + filter.feat_idx];
|
||||
value.is_finite() && filter.allowed.contains(&value)
|
||||
// O(1) lookup using f32 bits as key
|
||||
value.is_finite() && filter.allowed.contains(&value.to_bits())
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -101,8 +104,10 @@ pub fn row_passes_filters(
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn feature_names() -> Vec<String> {
|
||||
vec!["price".into(), "area".into(), "rating".into()]
|
||||
fn feature_name_to_index() -> FxHashMap<String, usize> {
|
||||
[("price".into(), 0), ("area".into(), 1), ("rating".into(), 2)]
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn enum_values() -> FxHashMap<usize, Vec<String>> {
|
||||
|
|
@ -113,7 +118,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn parse_filters_numeric() {
|
||||
let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_names(), &enum_values());
|
||||
let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_name_to_index(), &enum_values());
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
assert_eq!(numeric[0].min, 100.0);
|
||||
|
|
@ -123,22 +128,25 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn parse_filters_enum() {
|
||||
let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_names(), &enum_values());
|
||||
let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values());
|
||||
assert!(numeric.is_empty());
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert_eq!(enums[0].feat_idx, 2);
|
||||
assert_eq!(enums[0].allowed, vec![0.0, 2.0]);
|
||||
// Allowed values are stored as f32 bits
|
||||
assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // A = index 0
|
||||
assert!(enums[0].allowed.contains(&(2.0_f32).to_bits())); // C = index 2
|
||||
assert_eq!(enums[0].allowed.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filters_empty_and_invalid() {
|
||||
let (n, e) = parse_filters(None, &feature_names(), &enum_values());
|
||||
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
|
||||
let (n, e) = parse_filters(Some(""), &feature_names(), &enum_values());
|
||||
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
|
||||
let (n, e) = parse_filters(Some("unknown:1:2"), &feature_names(), &enum_values());
|
||||
let (n, e) = parse_filters(Some("unknown:1:2"), &feature_name_to_index(), &enum_values());
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,6 @@ pub use hexagon_stats::get_hexagon_stats;
|
|||
pub use hexagons::get_hexagons;
|
||||
pub use og_image::get_og_image;
|
||||
pub use pois::{get_poi_categories, get_pois};
|
||||
pub use postcodes::get_postcodes;
|
||||
pub use postcodes::{get_postcode_lookup, get_postcodes};
|
||||
pub use properties::get_hexagon_properties;
|
||||
pub use tiles::{get_style, get_tile, init_tile_reader};
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use axum::response::Json;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, HISTOGRAM_BINS};
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
|
||||
use crate::parsing::{h3_cell_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ pub async fn get_hexagon_stats(
|
|||
let filters_str = params.filters.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
|
@ -188,15 +188,17 @@ pub async fn get_hexagon_stats(
|
|||
let global_hist = &state.data.feature_stats[feature_index].histogram;
|
||||
let p1 = global_hist.p1;
|
||||
let p99 = global_hist.p99;
|
||||
// Use same bin count as global histogram for consistency
|
||||
let num_bins = global_hist.counts.len();
|
||||
|
||||
let mut count = 0usize;
|
||||
let mut min_value = f32::INFINITY;
|
||||
let mut max_value = f32::NEG_INFINITY;
|
||||
let mut sum = 0.0f64;
|
||||
let mut bins = vec![0u64; HISTOGRAM_BINS];
|
||||
let mut bins = vec![0u64; num_bins];
|
||||
|
||||
// Compute middle bin width (between p1 and p99)
|
||||
let middle_bins = HISTOGRAM_BINS.saturating_sub(2);
|
||||
let middle_bins = num_bins.saturating_sub(2);
|
||||
let middle_width = if middle_bins > 0 && p99 > p1 {
|
||||
(p99 - p1) / middle_bins as f32
|
||||
} else {
|
||||
|
|
@ -219,13 +221,13 @@ pub async fn get_hexagon_stats(
|
|||
let bin = if value < p1 {
|
||||
0 // Low outlier bin
|
||||
} else if value >= p99 {
|
||||
HISTOGRAM_BINS - 1 // High outlier bin
|
||||
num_bins - 1 // High outlier bin
|
||||
} else if middle_width > 0.0 {
|
||||
// Middle bins (1 to n-2)
|
||||
let middle_bin = ((value - p1) / middle_width) as usize;
|
||||
(1 + middle_bin).min(HISTOGRAM_BINS - 2)
|
||||
(1 + middle_bin).min(num_bins - 2)
|
||||
} else {
|
||||
HISTOGRAM_BINS / 2 // Fallback if p1 == p99
|
||||
num_bins / 2 // Fallback if p1 == p99
|
||||
};
|
||||
bins[bin] += 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use serde_json::{Map, Value};
|
|||
use tracing::{info, warn};
|
||||
|
||||
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
|
||||
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::parsing::{bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -92,21 +92,29 @@ impl CellAgg {
|
|||
}
|
||||
}
|
||||
|
||||
/// Build feature maps from aggregated cell data.
|
||||
/// Build feature maps from aggregated cell data, filtering to only cells that intersect the query bounds.
|
||||
fn build_feature_maps(
|
||||
groups: &FxHashMap<u64, CellAgg>,
|
||||
min_keys: &[String],
|
||||
max_keys: &[String],
|
||||
num_features: usize,
|
||||
indices: Option<&[usize]>,
|
||||
query_bounds: (f64, f64, f64, f64), // (south, west, north, east)
|
||||
) -> Vec<Map<String, Value>> {
|
||||
let mut features = Vec::with_capacity(groups.len());
|
||||
let (q_south, q_west, q_north, q_east) = query_bounds;
|
||||
|
||||
for (&cell_id, aggregation) in groups {
|
||||
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Filter out cells that don't intersect the query bounds
|
||||
let (c_south, c_west, c_north, c_east) = h3_cell_bounds(cell, 0.0);
|
||||
if !bounds_intersect(c_south, c_west, c_north, c_east, q_south, q_west, q_north, q_east) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut map = Map::new();
|
||||
map.insert("h3".into(), Value::String(cell.to_string()));
|
||||
map.insert("count".into(), Value::Number(aggregation.count.into()));
|
||||
|
|
@ -166,7 +174,7 @@ pub async fn get_hexagons(
|
|||
let filters_str = params.filters.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
|
@ -185,11 +193,7 @@ pub async fn get_hexagons(
|
|||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
state
|
||||
.data
|
||||
.feature_names
|
||||
.iter()
|
||||
.position(|feat| feat == name)
|
||||
state.feature_name_to_index.get(name).copied()
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
|
@ -209,20 +213,6 @@ pub async fn get_hexagons(
|
|||
|
||||
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
|
||||
|
||||
let has_selective = field_indices.is_some();
|
||||
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
|
||||
|
||||
let aggregate_row = |groups: &mut FxHashMap<u64, CellAgg>, cell_id: u64, row: usize| {
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
if has_selective {
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
} else {
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
}
|
||||
};
|
||||
|
||||
let cell_for_row = |row: usize| -> u64 {
|
||||
let max_cell = precomputed[row];
|
||||
if !need_parent || max_cell == 0 {
|
||||
|
|
@ -235,21 +225,48 @@ pub async fn get_hexagons(
|
|||
.unwrap_or(0)
|
||||
};
|
||||
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
aggregate_row(&mut groups, cell_for_row(row), row);
|
||||
});
|
||||
// Hoist has_selective branch outside the hot loop to avoid per-row branching
|
||||
if let Some(sel_indices) = field_indices.as_deref() {
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
let cell_id = cell_for_row(row);
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
aggregation.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
});
|
||||
} else {
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
let cell_id = cell_for_row(row);
|
||||
let aggregation = groups
|
||||
.entry(cell_id)
|
||||
.or_insert_with(|| CellAgg::new(num_features));
|
||||
aggregation.add_row(feature_data, row, num_features);
|
||||
});
|
||||
}
|
||||
|
||||
let t_agg = t0.elapsed();
|
||||
|
||||
|
|
@ -259,6 +276,7 @@ pub async fn get_hexagons(
|
|||
max_keys,
|
||||
num_features,
|
||||
field_indices.as_deref(),
|
||||
(south, west, north, east),
|
||||
);
|
||||
|
||||
let t_total = t0.elapsed();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::extract::{Path, Query};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::Json;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
|
@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
|
|||
use serde_json::{Map, Value};
|
||||
use tracing::info;
|
||||
|
||||
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::parsing::{bounds_intersect, parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::state::AppState;
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -96,7 +96,7 @@ pub async fn get_postcodes(
|
|||
let filters_str = params.filters.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
|
@ -113,11 +113,7 @@ pub async fn get_postcodes(
|
|||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
state
|
||||
.data
|
||||
.feature_names
|
||||
.iter()
|
||||
.position(|feat| feat == name)
|
||||
state.feature_name_to_index.get(name).copied()
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
|
@ -134,12 +130,6 @@ pub async fn get_postcodes(
|
|||
let has_selective = field_indices.is_some();
|
||||
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
|
||||
|
||||
// Step 1: Find postcodes within bounds using spatial grid on centroids
|
||||
let postcode_indices: Vec<u32> = postcode_data.grid.query(south, west, north, east);
|
||||
|
||||
// Step 2: For each postcode, aggregate properties
|
||||
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
|
||||
|
||||
// Build postcode -> rows mapping by iterating properties in bounds
|
||||
// and grouping by their postcode
|
||||
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
|
||||
|
|
@ -165,24 +155,23 @@ pub async fn get_postcodes(
|
|||
}
|
||||
});
|
||||
|
||||
// Now aggregate for each postcode that's in bounds and has properties
|
||||
for &pc_idx in &postcode_indices {
|
||||
let idx = pc_idx as usize;
|
||||
if let Some(rows) = postcode_rows.get(&idx) {
|
||||
let agg = postcode_aggs
|
||||
.entry(idx)
|
||||
.or_insert_with(|| PostcodeAgg::new(num_features));
|
||||
for &row in rows {
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features);
|
||||
}
|
||||
// Aggregate for each postcode that has properties in bounds
|
||||
// (polygon intersection check happens later when building response)
|
||||
let mut postcode_aggs: FxHashMap<usize, PostcodeAgg> = FxHashMap::default();
|
||||
for (&pc_idx, rows) in &postcode_rows {
|
||||
let agg = postcode_aggs
|
||||
.entry(pc_idx)
|
||||
.or_insert_with(|| PostcodeAgg::new(num_features));
|
||||
for &row in rows {
|
||||
if has_selective {
|
||||
agg.add_row_selective(feature_data, row, num_features, sel_indices);
|
||||
} else {
|
||||
agg.add_row(feature_data, row, num_features);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build response
|
||||
// Build response, filtering postcodes to only those whose polygon intersects query bounds
|
||||
let mut features = Vec::with_capacity(postcode_aggs.len());
|
||||
|
||||
for (pc_idx, aggregation) in postcode_aggs {
|
||||
|
|
@ -190,6 +179,23 @@ pub async fn get_postcodes(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Compute postcode polygon bounding box and check intersection with query bounds
|
||||
let vertices = &postcode_data.vertices[pc_idx];
|
||||
let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY);
|
||||
for &[lon, lat] in vertices {
|
||||
let lon_f = lon as f64;
|
||||
let lat_f = lat as f64;
|
||||
if lat_f < pc_south { pc_south = lat_f; }
|
||||
if lat_f > pc_north { pc_north = lat_f; }
|
||||
if lon_f < pc_west { pc_west = lon_f; }
|
||||
if lon_f > pc_east { pc_east = lon_f; }
|
||||
}
|
||||
|
||||
if !bounds_intersect(pc_south, pc_west, pc_north, pc_east, south, west, north, east) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut map = Map::new();
|
||||
map.insert(
|
||||
"postcode".into(),
|
||||
|
|
@ -198,7 +204,7 @@ pub async fn get_postcodes(
|
|||
map.insert("count".into(), Value::Number(aggregation.count.into()));
|
||||
|
||||
// Add vertices as array of [lon, lat] pairs
|
||||
let vertices_array: Vec<Value> = postcode_data.vertices[pc_idx]
|
||||
let vertices_array: Vec<Value> = vertices
|
||||
.iter()
|
||||
.map(|[lon, lat]| Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)]))
|
||||
.collect();
|
||||
|
|
@ -244,3 +250,44 @@ pub async fn get_postcodes(
|
|||
|
||||
Ok(Json(response))
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct PostcodeLookupResponse {
|
||||
pub postcode: String,
|
||||
pub latitude: f64,
|
||||
pub longitude: f64,
|
||||
/// Polygon vertices as [[lon, lat], ...] for rendering highlight
|
||||
pub vertices: Vec<[f64; 2]>,
|
||||
}
|
||||
|
||||
/// Look up a single postcode and return its centroid coordinates and polygon.
|
||||
pub async fn get_postcode_lookup(
|
||||
state: Arc<AppState>,
|
||||
Path(postcode): Path<String>,
|
||||
) -> Result<Json<PostcodeLookupResponse>, StatusCode> {
|
||||
// Normalize the postcode: uppercase, remove extra spaces, ensure single space
|
||||
let normalized = postcode
|
||||
.to_uppercase()
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let postcode_data = &state.postcode_data;
|
||||
|
||||
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
|
||||
let (lat, lon) = postcode_data.centroids[idx];
|
||||
let vertices: Vec<[f64; 2]> = postcode_data.vertices[idx]
|
||||
.iter()
|
||||
.map(|[lo, la]| [*lo as f64, *la as f64])
|
||||
.collect();
|
||||
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
|
||||
Ok(Json(PostcodeLookupResponse {
|
||||
postcode: normalized,
|
||||
latitude: lat as f64,
|
||||
longitude: lon as f64,
|
||||
vertices,
|
||||
}))
|
||||
} else {
|
||||
Err(StatusCode::NOT_FOUND)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ fn non_empty_string(text: &str) -> Option<String> {
|
|||
/// Look up an enum feature value by trying multiple possible column names.
|
||||
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
|
||||
fn lookup_enum_value(
|
||||
feature_names: &[String],
|
||||
feature_name_to_index: &FxHashMap<String, usize>,
|
||||
feature_data: &[f32],
|
||||
num_features: usize,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
|
|
@ -74,7 +74,7 @@ fn lookup_enum_value(
|
|||
names: &[&str],
|
||||
) -> Option<String> {
|
||||
for name in names {
|
||||
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == *name) {
|
||||
if let Some(&feat_idx) = feature_name_to_index.get(*name) {
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
let value = feature_data[row * num_features + feat_idx];
|
||||
if value.is_finite() {
|
||||
|
|
@ -120,7 +120,7 @@ pub async fn get_hexagon_properties(
|
|||
let filters_str = params.filters.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.data.feature_names,
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
|
@ -134,6 +134,7 @@ pub async fn get_hexagon_properties(
|
|||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let feature_names = &state.data.feature_names;
|
||||
let feature_name_to_index = &state.feature_name_to_index;
|
||||
let enum_values = &state.data.enum_values;
|
||||
|
||||
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
||||
|
|
@ -199,7 +200,7 @@ pub async fn get_hexagon_properties(
|
|||
postcode: non_empty_string(state.data.postcode(row)),
|
||||
is_construction_date_approximate: Some(state.data.is_approx_build_date(row)),
|
||||
property_type: lookup_enum_value(
|
||||
feature_names,
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
|
|
@ -207,7 +208,7 @@ pub async fn get_hexagon_properties(
|
|||
&["Property type", "epc_property_type", "pp_property_type"],
|
||||
),
|
||||
built_form: lookup_enum_value(
|
||||
feature_names,
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
|
|
@ -215,7 +216,7 @@ pub async fn get_hexagon_properties(
|
|||
&["Property type/built form", "built_form"],
|
||||
),
|
||||
duration: lookup_enum_value(
|
||||
feature_names,
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
|
|
@ -223,7 +224,7 @@ pub async fn get_hexagon_properties(
|
|||
&["Leashold/Freehold", "duration"],
|
||||
),
|
||||
current_energy_rating: lookup_enum_value(
|
||||
feature_names,
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
|
|
@ -231,7 +232,7 @@ pub async fn get_hexagon_properties(
|
|||
&["Current energy rating", "current_energy_rating"],
|
||||
),
|
||||
potential_energy_rating: lookup_enum_value(
|
||||
feature_names,
|
||||
feature_name_to_index,
|
||||
feature_data,
|
||||
num_features,
|
||||
enum_values,
|
||||
|
|
|
|||
|
|
@ -106,18 +106,7 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
|
|||
"paint": { "background-color": bg_color }
|
||||
})];
|
||||
|
||||
// Water layer
|
||||
if layer_ids.contains(&"water") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "water",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "water",
|
||||
"paint": { "fill-color": water_color }
|
||||
}));
|
||||
}
|
||||
|
||||
// Land/earth layer
|
||||
// Land/earth layer (must come before water so rivers render on top)
|
||||
if layer_ids.contains(&"earth") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "earth",
|
||||
|
|
@ -128,9 +117,9 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
|
|||
}));
|
||||
}
|
||||
|
||||
// Landuse
|
||||
// Landuse (parks, forests) - render before water
|
||||
if layer_ids.contains(&"landuse") {
|
||||
let landuse_color = if is_dark { "#1f2d1f" } else { "#d8e8c8" };
|
||||
let park_color = if is_dark { "#2d4a2d" } else { "#c8e6c8" };
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "landuse-park",
|
||||
"type": "fill",
|
||||
|
|
@ -141,7 +130,18 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
|
|||
["==", ["get", "pmap:kind"], "nature_reserve"],
|
||||
["==", ["get", "pmap:kind"], "forest"]
|
||||
],
|
||||
"paint": { "fill-color": landuse_color, "fill-opacity": 0.5 }
|
||||
"paint": { "fill-color": park_color, "fill-opacity": 0.7 }
|
||||
}));
|
||||
}
|
||||
|
||||
// Water layer (after earth so rivers show on top of land)
|
||||
if layer_ids.contains(&"water") {
|
||||
style_layers.push(serde_json::json!({
|
||||
"id": "water",
|
||||
"type": "fill",
|
||||
"source": "protomaps",
|
||||
"source-layer": "water",
|
||||
"paint": { "fill-color": water_color }
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
|
|||
974
server-rs/src/semantic_tests.rs
Normal file
974
server-rs/src/semantic_tests.rs
Normal file
|
|
@ -0,0 +1,974 @@
|
|||
//! Comprehensive semantic tests for the server.
|
||||
//!
|
||||
//! These tests validate the correctness of data processing, aggregation logic,
|
||||
//! spatial queries, and filter semantics without requiring real data files.
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::data::{compute_feature_stats, Histogram};
|
||||
use crate::features::Bounds;
|
||||
use crate::parsing::{
|
||||
bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters,
|
||||
ParsedEnumFilter, ParsedFilter,
|
||||
};
|
||||
use crate::utils::GridIndex;
|
||||
|
||||
// =========================================================================
|
||||
// GridIndex Tests
|
||||
// =========================================================================
|
||||
|
||||
mod grid_index {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_grid_returns_empty() {
|
||||
let grid = GridIndex::build(&[], &[], 0.01);
|
||||
assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_point_inside_query() {
|
||||
let lat = vec![51.5_f32];
|
||||
let lon = vec![-0.1_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query that contains the point
|
||||
let result = grid.query(51.4, -0.2, 51.6, 0.0);
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0], 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_point_outside_query() {
|
||||
let lat = vec![51.5_f32];
|
||||
let lon = vec![-0.1_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query that doesn't contain the point
|
||||
let result = grid.query(52.0, 0.0, 53.0, 1.0);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_points_partial_query() {
|
||||
let lat = vec![51.5_f32, 51.6, 51.7, 52.0];
|
||||
let lon = vec![-0.1_f32, -0.1, -0.1, -0.1];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query that contains only points 0, 1, 2
|
||||
let result = grid.query(51.4, -0.2, 51.8, 0.0);
|
||||
assert_eq!(result.len(), 3);
|
||||
assert!(result.contains(&0));
|
||||
assert!(result.contains(&1));
|
||||
assert!(result.contains(&2));
|
||||
assert!(!result.contains(&3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_at_grid_boundary() {
|
||||
// Points at exactly cell boundaries
|
||||
let lat = vec![51.0_f32, 51.01, 51.02];
|
||||
let lon = vec![0.0_f32, 0.01, 0.02];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query just past the first cell
|
||||
let result = grid.query(50.99, -0.01, 51.005, 0.005);
|
||||
assert!(result.contains(&0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn for_each_matches_query() {
|
||||
let lat = vec![51.5_f32, 51.6, 51.7];
|
||||
let lon = vec![-0.1_f32, -0.2, -0.3];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let query_result = grid.query(51.4, -0.25, 51.65, 0.0);
|
||||
|
||||
let mut foreach_result = Vec::new();
|
||||
grid.for_each_in_bounds(51.4, -0.25, 51.65, 0.0, |idx| {
|
||||
foreach_result.push(idx);
|
||||
});
|
||||
|
||||
// Both methods should return the same indices
|
||||
assert_eq!(query_result.len(), foreach_result.len());
|
||||
for idx in &query_result {
|
||||
assert!(foreach_result.contains(idx));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_coordinates() {
|
||||
let lat = vec![-33.9_f32, -33.8, -33.7];
|
||||
let lon = vec![151.2_f32, 151.3, 151.4];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query: south=-34.0, north=-33.65
|
||||
// -33.9 is in range (between -34 and -33.65), lon 151.2 in range (151.1 to 151.5) ✓
|
||||
// -33.8 is in range, lon 151.3 in range ✓
|
||||
// -33.7 is in range, lon 151.4 in range ✓
|
||||
let result = grid.query(-34.0, 151.1, -33.65, 151.5);
|
||||
assert_eq!(result.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_bounds_completely_outside_grid() {
|
||||
let lat = vec![51.5_f32];
|
||||
let lon = vec![-0.1_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query in a completely different area
|
||||
let result = grid.query(0.0, 100.0, 10.0, 110.0);
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn very_small_cell_size() {
|
||||
let lat = vec![51.5_f32, 51.5001, 51.5002];
|
||||
let lon = vec![-0.1_f32, -0.1001, -0.1002];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.0001);
|
||||
|
||||
let result = grid.query(51.4999, -0.1003, 51.5003, -0.0999);
|
||||
assert_eq!(result.len(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Filter Parsing Tests
|
||||
// =========================================================================
|
||||
|
||||
mod filter_parsing {
|
||||
use super::*;
|
||||
|
||||
fn make_feature_name_to_index() -> FxHashMap<String, usize> {
|
||||
[
|
||||
("Price".into(), 0),
|
||||
("Area".into(), 1),
|
||||
("Rating".into(), 2),
|
||||
("Type".into(), 3),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn make_enum_values() -> FxHashMap<usize, Vec<String>> {
|
||||
let mut map = FxHashMap::default();
|
||||
// Feature index 3 (Type) is an enum
|
||||
map.insert(3, vec!["Detached".into(), "Semi".into(), "Terraced".into(), "Flat".into()]);
|
||||
map
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_single_numeric_filter() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Price:100000:500000"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert!(enums.is_empty());
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
assert_eq!(numeric[0].min, 100000.0);
|
||||
assert_eq!(numeric[0].max, 500000.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_multiple_numeric_filters() {
|
||||
let (numeric, _enums) = parse_filters(
|
||||
Some("Price:100000:500000,Area:50:200"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert_eq!(numeric.len(), 2);
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
assert_eq!(numeric[1].feat_idx, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_single_enum_filter() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Type:Detached|Flat"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert_eq!(enums[0].feat_idx, 3);
|
||||
assert_eq!(enums[0].allowed, vec![0.0, 3.0]); // Detached=0, Flat=3
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_mixed_filters() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Price:100000:500000,Type:Semi|Terraced"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(enums.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unknown_feature_ignored() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Unknown:100:200"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert!(enums.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_invalid_numeric_format_ignored() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Price:not_a_number:500000"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert!(enums.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_enum_with_unknown_value() {
|
||||
let (_numeric, enums) = parse_filters(
|
||||
Some("Type:Detached|Unknown|Flat"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert_eq!(enums.len(), 1);
|
||||
// Unknown is filtered out, only Detached and Flat remain
|
||||
assert_eq!(enums[0].allowed, vec![0.0, 3.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_empty_filter_string() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some(""),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert!(enums.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_none_filter() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
None,
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert!(enums.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filter_with_whitespace() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
Some("Price : 100000 : 500000 , Type : Detached | Flat"),
|
||||
&make_feature_name_to_index(),
|
||||
&make_enum_values(),
|
||||
);
|
||||
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(enums.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Filter Application Tests
|
||||
// =========================================================================
|
||||
|
||||
mod filter_application {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn row_passes_no_filters() {
|
||||
let feature_data = vec![100.0_f32, 50.0];
|
||||
assert!(row_passes_filters(0, &[], &[], &feature_data, 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_passes_numeric_filter_in_range() {
|
||||
let feature_data = vec![150.0_f32];
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
assert!(row_passes_filters(0, &filters, &[], &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_numeric_filter_below_min() {
|
||||
let feature_data = vec![50.0_f32];
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_numeric_filter_above_max() {
|
||||
let feature_data = vec![250.0_f32];
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_passes_numeric_filter_at_boundary() {
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
|
||||
// At min boundary
|
||||
assert!(row_passes_filters(0, &filters, &[], &[100.0], 1));
|
||||
// At max boundary
|
||||
assert!(row_passes_filters(0, &filters, &[], &[200.0], 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_numeric_filter_with_nan() {
|
||||
let feature_data = vec![f32::NAN];
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_passes_enum_filter_allowed_value() {
|
||||
let feature_data = vec![1.0_f32]; // Index 1
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 0,
|
||||
allowed: vec![0.0, 1.0, 2.0],
|
||||
}];
|
||||
assert!(row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_enum_filter_disallowed_value() {
|
||||
let feature_data = vec![3.0_f32]; // Index 3 not in allowed
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 0,
|
||||
allowed: vec![0.0, 1.0, 2.0],
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_enum_filter_with_nan() {
|
||||
let feature_data = vec![f32::NAN];
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 0,
|
||||
allowed: vec![0.0, 1.0, 2.0],
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_fails_empty_enum_filter() {
|
||||
let feature_data = vec![1.0_f32];
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 0,
|
||||
allowed: vec![], // Empty allowed list
|
||||
}];
|
||||
// Empty allowed means nothing passes
|
||||
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_filters_all_must_pass() {
|
||||
// Row with two features: price=150, type=1
|
||||
let feature_data = vec![150.0_f32, 1.0];
|
||||
|
||||
let numeric_filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0,
|
||||
max: 200.0,
|
||||
}];
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 1,
|
||||
allowed: vec![1.0, 2.0],
|
||||
}];
|
||||
|
||||
assert!(row_passes_filters(0, &numeric_filters, &enum_filters, &feature_data, 2));
|
||||
|
||||
// Change enum filter to not include 1.0
|
||||
let enum_filters_fail = vec![ParsedEnumFilter {
|
||||
feat_idx: 1,
|
||||
allowed: vec![0.0, 2.0],
|
||||
}];
|
||||
assert!(!row_passes_filters(0, &numeric_filters, &enum_filters_fail, &feature_data, 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_major_layout_correct_indexing() {
|
||||
// 3 rows, 2 features each
|
||||
// Row 0: [100, 0], Row 1: [200, 1], Row 2: [300, 2]
|
||||
let feature_data = vec![
|
||||
100.0_f32, 0.0, // Row 0
|
||||
200.0, 1.0, // Row 1
|
||||
300.0, 2.0, // Row 2
|
||||
];
|
||||
let num_features = 2;
|
||||
|
||||
let filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 150.0,
|
||||
max: 250.0,
|
||||
}];
|
||||
|
||||
assert!(!row_passes_filters(0, &filters, &[], &feature_data, num_features)); // 100 not in range
|
||||
assert!(row_passes_filters(1, &filters, &[], &feature_data, num_features)); // 200 in range
|
||||
assert!(!row_passes_filters(2, &filters, &[], &feature_data, num_features)); // 300 not in range
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Bounds Parsing Tests
|
||||
// =========================================================================
|
||||
|
||||
mod bounds_parsing {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_valid_bounds() {
|
||||
let (south, west, north, east) = parse_bounds("51.0,-0.5,52.0,0.5").unwrap();
|
||||
assert_eq!(south, 51.0);
|
||||
assert_eq!(west, -0.5);
|
||||
assert_eq!(north, 52.0);
|
||||
assert_eq!(east, 0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_with_spaces() {
|
||||
let (south, west, _north, _east) = parse_bounds("51.0, -0.5, 52.0, 0.5").unwrap();
|
||||
assert_eq!(south, 51.0);
|
||||
assert_eq!(west, -0.5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_negative_values() {
|
||||
let (south, _west, north, _east) = parse_bounds("-51.5,-0.5,-50.0,0.5").unwrap();
|
||||
assert_eq!(south, -51.5);
|
||||
assert_eq!(north, -50.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_invalid_too_few_parts() {
|
||||
assert!(parse_bounds("51.0,-0.5,52.0").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_invalid_too_many_parts() {
|
||||
assert!(parse_bounds("51.0,-0.5,52.0,0.5,1.0").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_invalid_non_numeric() {
|
||||
assert!(parse_bounds("51.0,abc,52.0,0.5").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bounds_empty_string() {
|
||||
assert!(parse_bounds("").is_err());
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Bounds Intersection Tests
|
||||
// =========================================================================
|
||||
|
||||
mod bounds_intersection {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn overlapping_boxes_intersect() {
|
||||
assert!(bounds_intersect(
|
||||
0.0, 0.0, 2.0, 2.0, // Box A
|
||||
1.0, 1.0, 3.0, 3.0 // Box B overlaps
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_box_inside_other_intersects() {
|
||||
assert!(bounds_intersect(
|
||||
0.0, 0.0, 10.0, 10.0, // Box A (large)
|
||||
2.0, 2.0, 5.0, 5.0 // Box B (inside A)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn touching_at_corner_intersects() {
|
||||
assert!(bounds_intersect(
|
||||
0.0, 0.0, 1.0, 1.0, // Box A
|
||||
1.0, 1.0, 2.0, 2.0 // Box B touches at (1,1)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn touching_at_edge_intersects() {
|
||||
assert!(bounds_intersect(
|
||||
0.0, 0.0, 1.0, 1.0, // Box A
|
||||
1.0, 0.0, 2.0, 1.0 // Box B touches along right edge
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disjoint_horizontally_no_intersect() {
|
||||
assert!(!bounds_intersect(
|
||||
0.0, 0.0, 1.0, 1.0, // Box A
|
||||
0.0, 2.0, 1.0, 3.0 // Box B to the right
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disjoint_vertically_no_intersect() {
|
||||
assert!(!bounds_intersect(
|
||||
0.0, 0.0, 1.0, 1.0, // Box A
|
||||
2.0, 0.0, 3.0, 1.0 // Box B above
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn disjoint_diagonally_no_intersect() {
|
||||
assert!(!bounds_intersect(
|
||||
0.0, 0.0, 1.0, 1.0, // Box A
|
||||
2.0, 2.0, 3.0, 3.0 // Box B diagonally away
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_coordinates_intersect() {
|
||||
assert!(bounds_intersect(
|
||||
-2.0, -2.0, -1.0, -1.0, // Box A (negative coords)
|
||||
-1.5, -1.5, -0.5, -0.5 // Box B overlaps
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// H3 Cell Bounds Tests
|
||||
// =========================================================================
|
||||
|
||||
mod h3_bounds {
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn h3_cell_bounds_zero_buffer() {
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
let (south, west, north, east) = h3_cell_bounds(cell, 0.0);
|
||||
|
||||
// San Francisco area, should be roughly 37.77°N, -122.4°W
|
||||
assert!(south < north, "south {} should be < north {}", south, north);
|
||||
assert!(west < east, "west {} should be < east {}", west, east);
|
||||
assert!(south > 30.0 && south < 45.0);
|
||||
assert!(west < -100.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h3_cell_bounds_with_buffer() {
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
let (s0, w0, n0, e0) = h3_cell_bounds(cell, 0.0);
|
||||
let (s1, w1, n1, e1) = h3_cell_bounds(cell, 0.1);
|
||||
|
||||
// With buffer, bounds should be larger
|
||||
assert!(s1 < s0, "south with buffer should be smaller");
|
||||
assert!(w1 < w0, "west with buffer should be smaller");
|
||||
assert!(n1 > n0, "north with buffer should be larger");
|
||||
assert!(e1 > e0, "east with buffer should be larger");
|
||||
|
||||
// Buffer should be exactly 0.1 degrees
|
||||
assert!((s0 - s1 - 0.1).abs() < 1e-10);
|
||||
assert!((w0 - w1 - 0.1).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h3_cell_bounds_different_resolutions() {
|
||||
// Resolution 9 cell
|
||||
let cell_high = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
// Get its resolution 5 parent
|
||||
let res5 = h3o::Resolution::try_from(5).unwrap();
|
||||
let cell_low = cell_high.parent(res5).unwrap();
|
||||
|
||||
let (s_low, w_low, n_low, e_low) = h3_cell_bounds(cell_low, 0.0);
|
||||
let (s_high, w_high, n_high, e_high) = h3_cell_bounds(cell_high, 0.0);
|
||||
|
||||
// Lower resolution cell should have larger bounds
|
||||
let area_low = (n_low - s_low) * (e_low - w_low);
|
||||
let area_high = (n_high - s_high) * (e_high - w_high);
|
||||
assert!(area_low > area_high, "Lower res should have larger area");
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Histogram Computation Tests
|
||||
// =========================================================================
|
||||
|
||||
mod histogram {
|
||||
use super::*;
|
||||
|
||||
fn make_fixed_bounds(min: f32, max: f32) -> Bounds {
|
||||
Bounds::Fixed { min, max }
|
||||
}
|
||||
|
||||
fn make_percentile_bounds(low: f64, high: f64) -> Bounds {
|
||||
Bounds::Percentile { low, high }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_empty_data() {
|
||||
let data: Vec<f32> = vec![];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.slider_min, 0.0);
|
||||
assert_eq!(stats.slider_max, 100.0);
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_single_value() {
|
||||
let data = vec![50.0_f32];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.histogram.min, 50.0);
|
||||
assert_eq!(stats.histogram.max, 50.0);
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_uniform_distribution() {
|
||||
// 100 values from 0 to 99
|
||||
let data: Vec<f32> = (0..100).map(|i| i as f32).collect();
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.histogram.min, 0.0);
|
||||
assert_eq!(stats.histogram.max, 99.0);
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_with_nan_values() {
|
||||
let data = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 30.0];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
// Only 3 non-NaN values
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
|
||||
assert_eq!(stats.histogram.min, 10.0);
|
||||
assert_eq!(stats.histogram.max, 30.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_all_nan() {
|
||||
let data = vec![f32::NAN, f32::NAN, f32::NAN];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_all_same_value() {
|
||||
let data = vec![42.0_f32; 1000];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.histogram.min, 42.0);
|
||||
assert_eq!(stats.histogram.max, 42.0);
|
||||
assert_eq!(stats.histogram.p1, 42.0);
|
||||
assert_eq!(stats.histogram.p99, 42.0);
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_percentile_bounds() {
|
||||
// Data with outliers: 1 very low, 1 very high, 98 in middle
|
||||
let mut data: Vec<f32> = vec![0.0]; // Low outlier
|
||||
data.extend((1..99).map(|i| 50.0 + i as f32 * 0.01)); // Main data around 50
|
||||
data.push(1000.0); // High outlier
|
||||
|
||||
let bounds = make_percentile_bounds(2.0, 98.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
// Slider should exclude outliers
|
||||
assert!(stats.slider_min > 0.0);
|
||||
assert!(stats.slider_max < 1000.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_bin_for_value() {
|
||||
let hist = Histogram {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
p1: 10.0,
|
||||
p99: 90.0,
|
||||
counts: vec![0; 10], // 10 bins
|
||||
};
|
||||
|
||||
// Low outlier bin (bin 0)
|
||||
assert_eq!(hist.bin_for_value(5.0), 0);
|
||||
|
||||
// High outlier bin (bin 9)
|
||||
assert_eq!(hist.bin_for_value(95.0), 9);
|
||||
|
||||
// Middle bins (bins 1-8)
|
||||
let mid_value = 50.0;
|
||||
let bin = hist.bin_for_value(mid_value);
|
||||
assert!(bin >= 1 && bin <= 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_middle_bin_width() {
|
||||
let hist = Histogram {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
p1: 10.0,
|
||||
p99: 90.0,
|
||||
counts: vec![0; 10], // 10 bins
|
||||
};
|
||||
|
||||
// Middle bins span p1 to p99 (80 units) across 8 bins (10 - 2 outlier bins)
|
||||
let expected_width = (90.0 - 10.0) / 8.0;
|
||||
assert!((hist.middle_bin_width() - expected_width).abs() < 0.001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn histogram_cardinality_caps_bins() {
|
||||
// Only 3 unique values - should cap bins at 3
|
||||
let data = vec![1.0_f32, 1.0, 2.0, 2.0, 3.0, 3.0];
|
||||
let bounds = make_fixed_bounds(0.0, 100.0);
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
// Bins should be capped at cardinality (3)
|
||||
assert_eq!(stats.histogram.counts.len(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Aggregation Semantics Tests
|
||||
// =========================================================================
|
||||
|
||||
mod aggregation {
|
||||
/// Test that min/max aggregation correctly handles NaN values
|
||||
#[test]
|
||||
fn min_max_skips_nan() {
|
||||
let values = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 5.0];
|
||||
|
||||
let mut min = f32::INFINITY;
|
||||
let mut max = f32::NEG_INFINITY;
|
||||
for &v in &values {
|
||||
if v.is_finite() {
|
||||
if v < min {
|
||||
min = v;
|
||||
}
|
||||
if v > max {
|
||||
max = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(min, 5.0);
|
||||
assert_eq!(max, 20.0);
|
||||
}
|
||||
|
||||
/// Test that counting only counts non-NaN values
|
||||
#[test]
|
||||
fn count_skips_nan() {
|
||||
let values = vec![1.0_f32, f32::NAN, 2.0, f32::NAN, 3.0];
|
||||
let count = values.iter().filter(|v| v.is_finite()).count();
|
||||
assert_eq!(count, 3);
|
||||
}
|
||||
|
||||
/// Test enum value counting with indices
|
||||
#[test]
|
||||
fn enum_value_counting() {
|
||||
// Enum values: 0.0=Detached, 1.0=Semi, 2.0=Terraced, 3.0=Flat
|
||||
let values = vec![0.0_f32, 1.0, 1.0, 2.0, f32::NAN, 3.0, 1.0];
|
||||
let enum_count = 4;
|
||||
|
||||
let mut counts = vec![0u64; enum_count];
|
||||
for &v in &values {
|
||||
if v.is_finite() {
|
||||
let idx = v as usize;
|
||||
if idx < enum_count {
|
||||
counts[idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(counts[0], 1); // Detached
|
||||
assert_eq!(counts[1], 3); // Semi
|
||||
assert_eq!(counts[2], 1); // Terraced
|
||||
assert_eq!(counts[3], 1); // Flat
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// H3 Resolution Tests
|
||||
// =========================================================================
|
||||
|
||||
mod h3_resolution {
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn parent_cell_at_lower_resolution() {
|
||||
// Resolution 9 cell
|
||||
let child = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
|
||||
// Get parent at resolution 7
|
||||
let parent_res = h3o::Resolution::try_from(7).unwrap();
|
||||
let parent = child.parent(parent_res).unwrap();
|
||||
|
||||
assert_eq!(parent.resolution(), parent_res);
|
||||
|
||||
// Child should be contained in parent
|
||||
assert!(parent.children(child.resolution()).any(|c| c == child));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn same_resolution_returns_self() {
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
let res = cell.resolution();
|
||||
|
||||
// Getting parent at same resolution should return the cell itself
|
||||
let parent = cell.parent(res);
|
||||
assert_eq!(parent, Some(cell));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn higher_resolution_parent_fails() {
|
||||
// Resolution 9 cell
|
||||
let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap();
|
||||
|
||||
// Try to get "parent" at higher resolution (impossible)
|
||||
let higher_res = h3o::Resolution::try_from(10).unwrap();
|
||||
let parent = cell.parent(higher_res);
|
||||
assert!(parent.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Edge Cases and Error Handling
|
||||
// =========================================================================
|
||||
|
||||
mod edge_cases {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn very_large_coordinates() {
|
||||
let lat = vec![89.9_f32, -89.9];
|
||||
let lon = vec![179.9_f32, -179.9];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
let result = grid.query(-90.0, -180.0, 90.0, 180.0);
|
||||
assert_eq!(result.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_at_float_precision_boundary() {
|
||||
let value = 100.0_f32;
|
||||
let filter = ParsedFilter {
|
||||
feat_idx: 0,
|
||||
min: 100.0 - f32::EPSILON,
|
||||
max: 100.0 + f32::EPSILON,
|
||||
};
|
||||
|
||||
assert!(row_passes_filters(0, &[filter], &[], &[value], 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_filter_with_fractional_index() {
|
||||
// What happens if the stored value isn't exactly an integer?
|
||||
let feature_data = vec![1.5_f32]; // Not exactly 1.0 or 2.0
|
||||
let enum_filters = vec![ParsedEnumFilter {
|
||||
feat_idx: 0,
|
||||
allowed: vec![1.0, 2.0],
|
||||
}];
|
||||
|
||||
// 1.5 is not in the allowed list [1.0, 2.0]
|
||||
assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[test]
|
||||
fn bounds_with_inverted_min_max() {
|
||||
// What if south > north? (Invalid input)
|
||||
// The parse_bounds function doesn't validate this
|
||||
let (south, _west, north, _east) = parse_bounds("52.0,-0.5,51.0,0.5").unwrap();
|
||||
assert_eq!(south, 52.0);
|
||||
assert_eq!(north, 51.0);
|
||||
// south > north is allowed by parsing but logically invalid
|
||||
|
||||
// GridIndex should handle this gracefully
|
||||
let lat = vec![51.5_f32];
|
||||
let lon = vec![-0.1_f32];
|
||||
let grid = GridIndex::build(&lat, &lon, 0.01);
|
||||
|
||||
// Query with inverted bounds returns empty (row_min > row_max is rejected)
|
||||
let result = grid.query(52.0, -0.5, 51.0, 0.5);
|
||||
assert!(result.is_empty(), "Inverted bounds should return empty");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn infinity_values_in_data() {
|
||||
// NOTE: The current implementation uses !is_nan() not is_finite()
|
||||
// So INFINITY values ARE included in min/max calculations.
|
||||
// This documents current behavior - consider if this should be fixed.
|
||||
let data = vec![f32::INFINITY, f32::NEG_INFINITY, 50.0];
|
||||
let bounds = Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
};
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
// Current behavior: infinity is included (uses !is_nan())
|
||||
assert_eq!(stats.histogram.min, f32::NEG_INFINITY);
|
||||
assert_eq!(stats.histogram.max, f32::INFINITY);
|
||||
// All 3 values are counted (none are NaN)
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_finite_values() {
|
||||
// Test that normal finite values work correctly
|
||||
let data = vec![10.0_f32, 20.0, 30.0];
|
||||
let bounds = Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 100.0,
|
||||
};
|
||||
let stats = compute_feature_stats(&data, &bounds);
|
||||
|
||||
assert_eq!(stats.histogram.min, 10.0);
|
||||
assert_eq!(stats.histogram.max, 30.0);
|
||||
assert_eq!(stats.histogram.counts.iter().sum::<u64>(), 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::data::{POICategoryGroup, POIData, PostcodeData, PropertyData};
|
||||
use crate::routes::FeaturesResponse;
|
||||
use crate::utils::GridIndex;
|
||||
|
|
@ -12,6 +14,8 @@ pub struct AppState {
|
|||
pub poi_grid: GridIndex,
|
||||
/// Postcode boundary data for high-zoom rendering
|
||||
pub postcode_data: PostcodeData,
|
||||
/// O(1) lookup: feature name → index in feature_names/feature_data
|
||||
pub feature_name_to_index: FxHashMap<String, usize>,
|
||||
/// Precomputed JSON key names: "min_{feature_name}" for each feature
|
||||
pub min_keys: Vec<String>,
|
||||
/// Precomputed JSON key names: "max_{feature_name}" for each feature
|
||||
|
|
|
|||
|
|
@ -33,24 +33,19 @@ impl GridIndex {
|
|||
};
|
||||
}
|
||||
|
||||
// Single pass: compute min/max bounds and count items per cell
|
||||
let mut min_lat = f32::INFINITY;
|
||||
let mut max_lat = f32::NEG_INFINITY;
|
||||
let mut min_lon = f32::INFINITY;
|
||||
let mut max_lon = f32::NEG_INFINITY;
|
||||
|
||||
for index in 0..lat.len() {
|
||||
if lat[index] < min_lat {
|
||||
min_lat = lat[index];
|
||||
}
|
||||
if lat[index] > max_lat {
|
||||
max_lat = lat[index];
|
||||
}
|
||||
if lon[index] < min_lon {
|
||||
min_lon = lon[index];
|
||||
}
|
||||
if lon[index] > max_lon {
|
||||
max_lon = lon[index];
|
||||
}
|
||||
let la = lat[index];
|
||||
let lo = lon[index];
|
||||
if la < min_lat { min_lat = la; }
|
||||
if la > max_lat { max_lat = la; }
|
||||
if lo < min_lon { min_lon = lo; }
|
||||
if lo > max_lon { max_lon = lo; }
|
||||
}
|
||||
|
||||
min_lat -= cell_size;
|
||||
|
|
@ -70,7 +65,7 @@ impl GridIndex {
|
|||
"Building grid index (CSR)"
|
||||
);
|
||||
|
||||
// First pass: count items per cell
|
||||
// Count items per cell (now we know the bounds)
|
||||
let mut counts = vec![0u32; num_cells];
|
||||
for index in 0..lat.len() {
|
||||
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue