diff --git a/server-rs/Cargo.lock b/server-rs/Cargo.lock index 381d16f..02daae5 100644 --- a/server-rs/Cargo.lock +++ b/server-rs/Cargo.lock @@ -322,9 +322,9 @@ checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -348,9 +348,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" dependencies = [ "serde", ] @@ -411,9 +411,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.56" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e" +checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", "clap_derive", @@ -421,9 +421,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.56" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0" +checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ "anstream", "anstyle", @@ -749,9 +749,9 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1161,14 +1161,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -2600,9 +2599,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -2612,9 +2611,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -2623,9 +2622,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "reqwest" @@ -2946,9 +2945,9 @@ checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "slotmap" @@ -3130,9 +3129,9 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ "bitflags", "core-foundation 0.9.4", @@ -3649,9 +3648,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -4009,18 +4008,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.37" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +checksum = "57cf3aa6855b23711ee9852dfc97dfaa51c45feaba5b645d0c777414d494a961" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.37" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +checksum = "8a616990af1a287837c4fe6596ad77ef57948f787e46ce28e166facc0cc1cb75" dependencies = [ "proc-macro2", "quote", @@ -4089,15 +4088,15 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" [[package]] name = "zmij" -version = "1.0.17" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" [[package]] name = "zstd" diff --git a/server-rs/src/data.rs b/server-rs/src/data.rs index 6f0e5a9..7a7c181 100644 --- a/server-rs/src/data.rs +++ b/server-rs/src/data.rs @@ -4,4 +4,4 @@ mod property; pub use poi::{POICategoryGroup, POIData}; pub use postcodes::PostcodeData; -pub use property::{compute_feature_stats, precompute_h3, Histogram, PropertyData}; +pub use property::{precompute_h3, Histogram, PropertyData}; diff --git a/server-rs/src/main.rs b/server-rs/src/main.rs index 1e3a7fd..577a23c 100644 --- a/server-rs/src/main.rs +++ b/server-rs/src/main.rs @@ -5,8 +5,6 @@ mod metrics; mod og_middleware; pub mod parsing; mod routes; -#[cfg(test)] -mod semantic_tests; mod state; pub mod utils; diff --git a/server-rs/src/parsing/bounds.rs b/server-rs/src/parsing/bounds.rs index 462891b..b825b3f 100644 --- a/server-rs/src/parsing/bounds.rs +++ b/server-rs/src/parsing/bounds.rs @@ -66,7 +66,23 @@ pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCod )); } - Ok((parts[0], parts[1], parts[2], parts[3])) + let (south, west, north, east) = (parts[0], parts[1], parts[2], parts[3]); + + // Validate that bounds are not inverted + if south > north { + return Err(( + StatusCode::BAD_REQUEST, + format!("Invalid bounds: south ({}) must be <= north ({})", south, north), + )); + } + if west > east { + return Err(( + StatusCode::BAD_REQUEST, + format!("Invalid bounds: west ({}) must be <= east ({})", west, east), + )); + } + + Ok((south, west, north, east)) } #[cfg(test)] @@ -76,8 +92,14 @@ mod tests { #[test] fn parse_bounds_valid() { - assert_eq!(parse_bounds("1.0,2.0,3.0,4.0").unwrap(), (1.0, 2.0, 3.0, 4.0)); - assert_eq!(parse_bounds("-51.5, -0.1, 51.6, 0.2").unwrap(), (-51.5, -0.1, 51.6, 0.2)); + assert_eq!( + parse_bounds("1.0,2.0,3.0,4.0").unwrap(), + (1.0, 2.0, 3.0, 4.0) + ); + assert_eq!( + parse_bounds("-51.5, -0.1, 51.6, 0.2").unwrap(), + (-51.5, -0.1, 51.6, 0.2) + ); } #[test] @@ -88,6 +110,14 @@ mod tests { assert!(parse_bounds("").is_err()); } + #[test] + fn parse_bounds_inverted_rejected() { + // south > north is rejected + assert!(parse_bounds("52.0,-0.5,51.0,0.5").is_err()); + // west > east is rejected + assert!(parse_bounds("51.0,0.5,52.0,-0.5").is_err()); + } + #[test] fn h3_cell_bounds_applies_buffer() { let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); @@ -102,39 +132,143 @@ mod tests { #[test] fn h3_cell_bounds_returns_degrees_not_radians() { - // Cell "8928308280fffff" is in San Francisco area (~37.77°N, ~-122.4°W) let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.0); - // If h3o returned radians, values would be < π ≈ 3.14 - // Latitude ~37.77° proves we're getting degrees, not radians - assert!(min_lat > 30.0 && min_lat < 45.0, "min_lat {} should be ~37° (degrees)", min_lat); - assert!(max_lat > 30.0 && max_lat < 45.0, "max_lat {} should be ~37° (degrees)", max_lat); - - // Longitude ~-122° also proves degrees (radians would be < π) - assert!(min_lon < -100.0, "min_lon {} should be ~-122° (degrees)", min_lon); - assert!(max_lon < -100.0, "max_lon {} should be ~-122° (degrees)", max_lon); + assert!( + min_lat > 30.0 && min_lat < 45.0, + "min_lat {} should be ~37° (degrees)", + min_lat + ); + assert!( + max_lat > 30.0 && max_lat < 45.0, + "max_lat {} should be ~37° (degrees)", + max_lat + ); + assert!( + min_lon < -100.0, + "min_lon {} should be ~-122° (degrees)", + min_lon + ); + assert!( + max_lon < -100.0, + "max_lon {} should be ~-122° (degrees)", + max_lon + ); } #[test] fn bounds_intersect_overlapping() { - // Two overlapping boxes assert!(bounds_intersect(0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0)); - // Box B is inside box A assert!(bounds_intersect(0.0, 0.0, 10.0, 10.0, 2.0, 2.0, 5.0, 5.0)); - // Box A is inside box B assert!(bounds_intersect(2.0, 2.0, 5.0, 5.0, 0.0, 0.0, 10.0, 10.0)); - // Touching at edge assert!(bounds_intersect(0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0)); } #[test] fn bounds_intersect_non_overlapping() { - // Box B is to the right of box A assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 0.0, 2.0, 1.0, 3.0)); - // Box B is above box A assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 3.0, 1.0)); - // Completely separate assert!(!bounds_intersect(0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0)); } + + #[test] + fn parse_bounds_with_spaces() { + let (south, west, _north, _east) = parse_bounds("51.0, -0.5, 52.0, 0.5").unwrap(); + assert_eq!(south, 51.0); + assert_eq!(west, -0.5); + } + + #[test] + fn parse_bounds_negative_values() { + let (south, _west, north, _east) = parse_bounds("-51.5,-0.5,-50.0,0.5").unwrap(); + assert_eq!(south, -51.5); + assert_eq!(north, -50.0); + } + + #[test] + fn touching_at_corner_intersects() { + assert!(bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 1.0, 1.0, 2.0, 2.0 // Box B touches at (1,1) + )); + } + + #[test] + fn touching_at_edge_intersects() { + assert!(bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 1.0, 0.0, 2.0, 1.0 // Box B touches along right edge + )); + } + + #[test] + fn disjoint_diagonally_no_intersect() { + assert!(!bounds_intersect( + 0.0, 0.0, 1.0, 1.0, // Box A + 2.0, 2.0, 3.0, 3.0 // Box B diagonally away + )); + } + + #[test] + fn negative_coordinates_intersect() { + assert!(bounds_intersect( + -2.0, -2.0, -1.0, -1.0, // Box A (negative coords) + -1.5, -1.5, -0.5, -0.5 // Box B overlaps + )); + } + + #[test] + fn h3_cell_bounds_zero_buffer() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let (south, west, north, east) = h3_cell_bounds(cell, 0.0); + + assert!(south < north, "south {} should be < north {}", south, north); + assert!(west < east, "west {} should be < east {}", west, east); + assert!(south > 30.0 && south < 45.0); + assert!(west < -100.0); + } + + #[test] + fn h3_cell_bounds_different_resolutions() { + let cell_high = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let res5 = h3o::Resolution::try_from(5).unwrap(); + let cell_low = cell_high.parent(res5).unwrap(); + + let (s_low, w_low, n_low, e_low) = h3_cell_bounds(cell_low, 0.0); + let (s_high, w_high, n_high, e_high) = h3_cell_bounds(cell_high, 0.0); + + let area_low = (n_low - s_low) * (e_low - w_low); + let area_high = (n_high - s_high) * (e_high - w_high); + assert!(area_low > area_high, "Lower res should have larger area"); + } + + #[test] + fn parent_cell_at_lower_resolution() { + let child = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + + let parent_res = h3o::Resolution::try_from(7).unwrap(); + let parent = child.parent(parent_res).unwrap(); + + assert_eq!(parent.resolution(), parent_res); + assert!(parent.children(child.resolution()).any(|c| c == child)); + } + + #[test] + fn same_resolution_returns_self() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + let res = cell.resolution(); + + let parent = cell.parent(res); + assert_eq!(parent, Some(cell)); + } + + #[test] + fn higher_resolution_parent_fails() { + let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); + + let higher_res = h3o::Resolution::try_from(10).unwrap(); + let parent = cell.parent(higher_res); + assert!(parent.is_none()); + } } diff --git a/server-rs/src/parsing/filters.rs b/server-rs/src/parsing/filters.rs index e098636..118034c 100644 --- a/server-rs/src/parsing/filters.rs +++ b/server-rs/src/parsing/filters.rs @@ -116,9 +116,39 @@ mod tests { map } + fn extended_feature_map() -> FxHashMap { + [ + ("Price".into(), 0), + ("Area".into(), 1), + ("Rating".into(), 2), + ("Type".into(), 3), + ] + .into_iter() + .collect() + } + + fn extended_enum_values() -> FxHashMap> { + let mut map = FxHashMap::default(); + map.insert( + 3, + vec![ + "Detached".into(), + "Semi".into(), + "Terraced".into(), + "Flat".into(), + ], + ); + map + } + + fn allowed_set(values: &[f32]) -> FxHashSet { + values.iter().map(|v| v.to_bits()).collect() + } + #[test] fn parse_filters_numeric() { - let (numeric, enums) = parse_filters(Some("price:100:500"), &feature_name_to_index(), &enum_values()); + let (numeric, enums) = + parse_filters(Some("price:100:500"), &feature_name_to_index(), &enum_values()); assert_eq!(numeric.len(), 1); assert_eq!(numeric[0].feat_idx, 0); assert_eq!(numeric[0].min, 100.0); @@ -128,11 +158,11 @@ mod tests { #[test] fn parse_filters_enum() { - let (numeric, enums) = parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values()); + let (numeric, enums) = + parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values()); assert!(numeric.is_empty()); assert_eq!(enums.len(), 1); assert_eq!(enums[0].feat_idx, 2); - // Allowed values are stored as f32 bits assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // A = index 0 assert!(enums[0].allowed.contains(&(2.0_f32).to_bits())); // C = index 2 assert_eq!(enums[0].allowed.len(), 2); @@ -152,7 +182,11 @@ mod tests { #[test] fn row_passes_numeric_filter() { - let filters = vec![ParsedFilter { feat_idx: 0, min: 10.0, max: 20.0 }]; + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 10.0, + max: 20.0, + }]; let data = vec![15.0, 5.0, f32::NAN]; assert!(row_passes_filters(0, &filters, &[], &data, 1)); @@ -162,8 +196,11 @@ mod tests { #[test] fn row_passes_enum_filter() { - let filters = vec![ParsedEnumFilter { feat_idx: 0, allowed: vec![0.0, 2.0] }]; - // Row 0: value 0.0 (allowed), Row 1: value 1.0 (not allowed), Row 2: value 2.0 (allowed), Row 3: NaN (fails) + let allowed: FxHashSet = [0.0_f32, 2.0].iter().map(|v| v.to_bits()).collect(); + let filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed, + }]; let data = vec![0.0, 1.0, 2.0, f32::NAN]; assert!(row_passes_filters(0, &[], &filters, &data, 1)); @@ -171,4 +208,173 @@ mod tests { assert!(row_passes_filters(2, &[], &filters, &data, 1)); assert!(!row_passes_filters(3, &[], &filters, &data, 1)); // NaN fails } + + #[test] + fn parse_multiple_numeric_filters() { + let (numeric, _enums) = parse_filters( + Some("Price:100000:500000,Area:50:200"), + &extended_feature_map(), + &extended_enum_values(), + ); + + assert_eq!(numeric.len(), 2); + assert_eq!(numeric[0].feat_idx, 0); + assert_eq!(numeric[1].feat_idx, 1); + } + + #[test] + fn parse_mixed_filters() { + let (numeric, enums) = parse_filters( + Some("Price:100000:500000,Type:Semi|Terraced"), + &extended_feature_map(), + &extended_enum_values(), + ); + + assert_eq!(numeric.len(), 1); + assert_eq!(enums.len(), 1); + } + + #[test] + fn parse_invalid_numeric_format_ignored() { + let (numeric, enums) = parse_filters( + Some("Price:not_a_number:500000"), + &extended_feature_map(), + &extended_enum_values(), + ); + + assert!(numeric.is_empty()); + assert!(enums.is_empty()); + } + + #[test] + fn parse_enum_with_unknown_value() { + let (_numeric, enums) = parse_filters( + Some("Type:Detached|Unknown|Flat"), + &extended_feature_map(), + &extended_enum_values(), + ); + + assert_eq!(enums.len(), 1); + assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // Detached + assert!(enums[0].allowed.contains(&(3.0_f32).to_bits())); // Flat + assert_eq!(enums[0].allowed.len(), 2); + } + + #[test] + fn parse_filter_with_whitespace() { + let (numeric, enums) = parse_filters( + Some("Price : 100000 : 500000 , Type : Detached | Flat"), + &extended_feature_map(), + &extended_enum_values(), + ); + + assert_eq!(numeric.len(), 1); + assert_eq!(enums.len(), 1); + } + + #[test] + fn row_passes_no_filters() { + let feature_data = vec![100.0_f32, 50.0]; + assert!(row_passes_filters(0, &[], &[], &feature_data, 2)); + } + + #[test] + fn row_passes_numeric_filter_at_boundary() { + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + + assert!(row_passes_filters(0, &filters, &[], &[100.0], 1)); + assert!(row_passes_filters(0, &filters, &[], &[200.0], 1)); + } + + #[test] + fn row_fails_empty_enum_filter() { + let feature_data = vec![1.0_f32]; + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: FxHashSet::default(), + }]; + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } + + #[test] + fn multiple_filters_all_must_pass() { + let feature_data = vec![150.0_f32, 1.0]; + + let numeric_filters = vec![ParsedFilter { + feat_idx: 0, + min: 100.0, + max: 200.0, + }]; + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 1, + allowed: allowed_set(&[1.0, 2.0]), + }]; + + assert!(row_passes_filters( + 0, + &numeric_filters, + &enum_filters, + &feature_data, + 2 + )); + + let enum_filters_fail = vec![ParsedEnumFilter { + feat_idx: 1, + allowed: allowed_set(&[0.0, 2.0]), + }]; + assert!(!row_passes_filters( + 0, + &numeric_filters, + &enum_filters_fail, + &feature_data, + 2 + )); + } + + #[test] + fn row_major_layout_correct_indexing() { + let feature_data = vec![ + 100.0_f32, 0.0, // Row 0 + 200.0, 1.0, // Row 1 + 300.0, 2.0, // Row 2 + ]; + let num_features = 2; + + let filters = vec![ParsedFilter { + feat_idx: 0, + min: 150.0, + max: 250.0, + }]; + + assert!(!row_passes_filters(0, &filters, &[], &feature_data, num_features)); + assert!(row_passes_filters(1, &filters, &[], &feature_data, num_features)); + assert!(!row_passes_filters(2, &filters, &[], &feature_data, num_features)); + } + + #[test] + fn filter_at_float_precision_boundary() { + let value = 100.0_f32; + let filter = ParsedFilter { + feat_idx: 0, + min: 100.0 - f32::EPSILON, + max: 100.0 + f32::EPSILON, + }; + + assert!(row_passes_filters(0, &[filter], &[], &[value], 1)); + } + + #[test] + fn enum_filter_with_fractional_index() { + let feature_data = vec![1.5_f32]; // Not exactly 1.0 or 2.0 + let enum_filters = vec![ParsedEnumFilter { + feat_idx: 0, + allowed: allowed_set(&[1.0, 2.0]), + }]; + + assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); + } } diff --git a/server-rs/src/routes/hexagons.rs b/server-rs/src/routes/hexagons.rs index a8f64c3..47fd428 100644 --- a/server-rs/src/routes/hexagons.rs +++ b/server-rs/src/routes/hexagons.rs @@ -282,7 +282,9 @@ pub async fn get_hexagons( let t_total = t0.elapsed(); info!( resolution, - cells = groups.len(), + cells_before_filter = groups.len(), + cells_after_filter = features.len(), + bounds = format_args!("{:.4},{:.4},{:.4},{:.4}", south, west, north, east), filters = num_filters, filters_raw = filters_str.as_deref().unwrap_or("-"), agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0), diff --git a/server-rs/src/routes/postcodes.rs b/server-rs/src/routes/postcodes.rs index 7c68f3a..eb69428 100644 --- a/server-rs/src/routes/postcodes.rs +++ b/server-rs/src/routes/postcodes.rs @@ -173,6 +173,8 @@ pub async fn get_postcodes( // Build response, filtering postcodes to only those whose polygon intersects query bounds let mut features = Vec::with_capacity(postcode_aggs.len()); + let postcodes_before_filter = postcode_aggs.len(); + let mut filtered_out = 0usize; for (pc_idx, aggregation) in postcode_aggs { if aggregation.count == 0 { @@ -193,6 +195,7 @@ pub async fn get_postcodes( } if !bounds_intersect(pc_south, pc_west, pc_north, pc_east, south, west, north, east) { + filtered_out += 1; continue; } @@ -235,7 +238,10 @@ pub async fn get_postcodes( let t_total = t0.elapsed(); info!( - postcodes = features.len(), + postcodes_before_filter, + postcodes_after_filter = features.len(), + filtered_out, + bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east), filters = num_filters, filters_raw = filters_str.as_deref().unwrap_or("-"), total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0), diff --git a/server-rs/src/semantic_tests.rs b/server-rs/src/semantic_tests.rs deleted file mode 100644 index b2803f4..0000000 --- a/server-rs/src/semantic_tests.rs +++ /dev/null @@ -1,974 +0,0 @@ -//! Comprehensive semantic tests for the server. -//! -//! These tests validate the correctness of data processing, aggregation logic, -//! spatial queries, and filter semantics without requiring real data files. - -#[cfg(test)] -mod tests { - use rustc_hash::FxHashMap; - - use crate::data::{compute_feature_stats, Histogram}; - use crate::features::Bounds; - use crate::parsing::{ - bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters, - ParsedEnumFilter, ParsedFilter, - }; - use crate::utils::GridIndex; - - // ========================================================================= - // GridIndex Tests - // ========================================================================= - - mod grid_index { - use super::*; - - #[test] - fn empty_grid_returns_empty() { - let grid = GridIndex::build(&[], &[], 0.01); - assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty()); - } - - #[test] - fn single_point_inside_query() { - let lat = vec![51.5_f32]; - let lon = vec![-0.1_f32]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query that contains the point - let result = grid.query(51.4, -0.2, 51.6, 0.0); - assert_eq!(result.len(), 1); - assert_eq!(result[0], 0); - } - - #[test] - fn single_point_outside_query() { - let lat = vec![51.5_f32]; - let lon = vec![-0.1_f32]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query that doesn't contain the point - let result = grid.query(52.0, 0.0, 53.0, 1.0); - assert!(result.is_empty()); - } - - #[test] - fn multiple_points_partial_query() { - let lat = vec![51.5_f32, 51.6, 51.7, 52.0]; - let lon = vec![-0.1_f32, -0.1, -0.1, -0.1]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query that contains only points 0, 1, 2 - let result = grid.query(51.4, -0.2, 51.8, 0.0); - assert_eq!(result.len(), 3); - assert!(result.contains(&0)); - assert!(result.contains(&1)); - assert!(result.contains(&2)); - assert!(!result.contains(&3)); - } - - #[test] - fn query_at_grid_boundary() { - // Points at exactly cell boundaries - let lat = vec![51.0_f32, 51.01, 51.02]; - let lon = vec![0.0_f32, 0.01, 0.02]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query just past the first cell - let result = grid.query(50.99, -0.01, 51.005, 0.005); - assert!(result.contains(&0)); - } - - #[test] - fn for_each_matches_query() { - let lat = vec![51.5_f32, 51.6, 51.7]; - let lon = vec![-0.1_f32, -0.2, -0.3]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - let query_result = grid.query(51.4, -0.25, 51.65, 0.0); - - let mut foreach_result = Vec::new(); - grid.for_each_in_bounds(51.4, -0.25, 51.65, 0.0, |idx| { - foreach_result.push(idx); - }); - - // Both methods should return the same indices - assert_eq!(query_result.len(), foreach_result.len()); - for idx in &query_result { - assert!(foreach_result.contains(idx)); - } - } - - #[test] - fn negative_coordinates() { - let lat = vec![-33.9_f32, -33.8, -33.7]; - let lon = vec![151.2_f32, 151.3, 151.4]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query: south=-34.0, north=-33.65 - // -33.9 is in range (between -34 and -33.65), lon 151.2 in range (151.1 to 151.5) ✓ - // -33.8 is in range, lon 151.3 in range ✓ - // -33.7 is in range, lon 151.4 in range ✓ - let result = grid.query(-34.0, 151.1, -33.65, 151.5); - assert_eq!(result.len(), 3); - } - - #[test] - fn query_bounds_completely_outside_grid() { - let lat = vec![51.5_f32]; - let lon = vec![-0.1_f32]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query in a completely different area - let result = grid.query(0.0, 100.0, 10.0, 110.0); - assert!(result.is_empty()); - } - - #[test] - fn very_small_cell_size() { - let lat = vec![51.5_f32, 51.5001, 51.5002]; - let lon = vec![-0.1_f32, -0.1001, -0.1002]; - let grid = GridIndex::build(&lat, &lon, 0.0001); - - let result = grid.query(51.4999, -0.1003, 51.5003, -0.0999); - assert_eq!(result.len(), 3); - } - } - - // ========================================================================= - // Filter Parsing Tests - // ========================================================================= - - mod filter_parsing { - use super::*; - - fn make_feature_name_to_index() -> FxHashMap { - [ - ("Price".into(), 0), - ("Area".into(), 1), - ("Rating".into(), 2), - ("Type".into(), 3), - ] - .into_iter() - .collect() - } - - fn make_enum_values() -> FxHashMap> { - let mut map = FxHashMap::default(); - // Feature index 3 (Type) is an enum - map.insert(3, vec!["Detached".into(), "Semi".into(), "Terraced".into(), "Flat".into()]); - map - } - - #[test] - fn parse_single_numeric_filter() { - let (numeric, enums) = parse_filters( - Some("Price:100000:500000"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert_eq!(numeric.len(), 1); - assert!(enums.is_empty()); - assert_eq!(numeric[0].feat_idx, 0); - assert_eq!(numeric[0].min, 100000.0); - assert_eq!(numeric[0].max, 500000.0); - } - - #[test] - fn parse_multiple_numeric_filters() { - let (numeric, _enums) = parse_filters( - Some("Price:100000:500000,Area:50:200"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert_eq!(numeric.len(), 2); - assert_eq!(numeric[0].feat_idx, 0); - assert_eq!(numeric[1].feat_idx, 1); - } - - #[test] - fn parse_single_enum_filter() { - let (numeric, enums) = parse_filters( - Some("Type:Detached|Flat"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert!(numeric.is_empty()); - assert_eq!(enums.len(), 1); - assert_eq!(enums[0].feat_idx, 3); - assert_eq!(enums[0].allowed, vec![0.0, 3.0]); // Detached=0, Flat=3 - } - - #[test] - fn parse_mixed_filters() { - let (numeric, enums) = parse_filters( - Some("Price:100000:500000,Type:Semi|Terraced"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert_eq!(numeric.len(), 1); - assert_eq!(enums.len(), 1); - } - - #[test] - fn parse_unknown_feature_ignored() { - let (numeric, enums) = parse_filters( - Some("Unknown:100:200"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert!(numeric.is_empty()); - assert!(enums.is_empty()); - } - - #[test] - fn parse_invalid_numeric_format_ignored() { - let (numeric, enums) = parse_filters( - Some("Price:not_a_number:500000"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert!(numeric.is_empty()); - assert!(enums.is_empty()); - } - - #[test] - fn parse_enum_with_unknown_value() { - let (_numeric, enums) = parse_filters( - Some("Type:Detached|Unknown|Flat"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert_eq!(enums.len(), 1); - // Unknown is filtered out, only Detached and Flat remain - assert_eq!(enums[0].allowed, vec![0.0, 3.0]); - } - - #[test] - fn parse_empty_filter_string() { - let (numeric, enums) = parse_filters( - Some(""), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert!(numeric.is_empty()); - assert!(enums.is_empty()); - } - - #[test] - fn parse_none_filter() { - let (numeric, enums) = parse_filters( - None, - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert!(numeric.is_empty()); - assert!(enums.is_empty()); - } - - #[test] - fn parse_filter_with_whitespace() { - let (numeric, enums) = parse_filters( - Some("Price : 100000 : 500000 , Type : Detached | Flat"), - &make_feature_name_to_index(), - &make_enum_values(), - ); - - assert_eq!(numeric.len(), 1); - assert_eq!(enums.len(), 1); - } - } - - // ========================================================================= - // Filter Application Tests - // ========================================================================= - - mod filter_application { - use super::*; - - #[test] - fn row_passes_no_filters() { - let feature_data = vec![100.0_f32, 50.0]; - assert!(row_passes_filters(0, &[], &[], &feature_data, 2)); - } - - #[test] - fn row_passes_numeric_filter_in_range() { - let feature_data = vec![150.0_f32]; - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - assert!(row_passes_filters(0, &filters, &[], &feature_data, 1)); - } - - #[test] - fn row_fails_numeric_filter_below_min() { - let feature_data = vec![50.0_f32]; - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); - } - - #[test] - fn row_fails_numeric_filter_above_max() { - let feature_data = vec![250.0_f32]; - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); - } - - #[test] - fn row_passes_numeric_filter_at_boundary() { - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - - // At min boundary - assert!(row_passes_filters(0, &filters, &[], &[100.0], 1)); - // At max boundary - assert!(row_passes_filters(0, &filters, &[], &[200.0], 1)); - } - - #[test] - fn row_fails_numeric_filter_with_nan() { - let feature_data = vec![f32::NAN]; - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - assert!(!row_passes_filters(0, &filters, &[], &feature_data, 1)); - } - - #[test] - fn row_passes_enum_filter_allowed_value() { - let feature_data = vec![1.0_f32]; // Index 1 - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 0, - allowed: vec![0.0, 1.0, 2.0], - }]; - assert!(row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); - } - - #[test] - fn row_fails_enum_filter_disallowed_value() { - let feature_data = vec![3.0_f32]; // Index 3 not in allowed - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 0, - allowed: vec![0.0, 1.0, 2.0], - }]; - assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); - } - - #[test] - fn row_fails_enum_filter_with_nan() { - let feature_data = vec![f32::NAN]; - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 0, - allowed: vec![0.0, 1.0, 2.0], - }]; - assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); - } - - #[test] - fn row_fails_empty_enum_filter() { - let feature_data = vec![1.0_f32]; - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 0, - allowed: vec![], // Empty allowed list - }]; - // Empty allowed means nothing passes - assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); - } - - #[test] - fn multiple_filters_all_must_pass() { - // Row with two features: price=150, type=1 - let feature_data = vec![150.0_f32, 1.0]; - - let numeric_filters = vec![ParsedFilter { - feat_idx: 0, - min: 100.0, - max: 200.0, - }]; - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 1, - allowed: vec![1.0, 2.0], - }]; - - assert!(row_passes_filters(0, &numeric_filters, &enum_filters, &feature_data, 2)); - - // Change enum filter to not include 1.0 - let enum_filters_fail = vec![ParsedEnumFilter { - feat_idx: 1, - allowed: vec![0.0, 2.0], - }]; - assert!(!row_passes_filters(0, &numeric_filters, &enum_filters_fail, &feature_data, 2)); - } - - #[test] - fn row_major_layout_correct_indexing() { - // 3 rows, 2 features each - // Row 0: [100, 0], Row 1: [200, 1], Row 2: [300, 2] - let feature_data = vec![ - 100.0_f32, 0.0, // Row 0 - 200.0, 1.0, // Row 1 - 300.0, 2.0, // Row 2 - ]; - let num_features = 2; - - let filters = vec![ParsedFilter { - feat_idx: 0, - min: 150.0, - max: 250.0, - }]; - - assert!(!row_passes_filters(0, &filters, &[], &feature_data, num_features)); // 100 not in range - assert!(row_passes_filters(1, &filters, &[], &feature_data, num_features)); // 200 in range - assert!(!row_passes_filters(2, &filters, &[], &feature_data, num_features)); // 300 not in range - } - } - - // ========================================================================= - // Bounds Parsing Tests - // ========================================================================= - - mod bounds_parsing { - use super::*; - - #[test] - fn parse_valid_bounds() { - let (south, west, north, east) = parse_bounds("51.0,-0.5,52.0,0.5").unwrap(); - assert_eq!(south, 51.0); - assert_eq!(west, -0.5); - assert_eq!(north, 52.0); - assert_eq!(east, 0.5); - } - - #[test] - fn parse_bounds_with_spaces() { - let (south, west, _north, _east) = parse_bounds("51.0, -0.5, 52.0, 0.5").unwrap(); - assert_eq!(south, 51.0); - assert_eq!(west, -0.5); - } - - #[test] - fn parse_bounds_negative_values() { - let (south, _west, north, _east) = parse_bounds("-51.5,-0.5,-50.0,0.5").unwrap(); - assert_eq!(south, -51.5); - assert_eq!(north, -50.0); - } - - #[test] - fn parse_bounds_invalid_too_few_parts() { - assert!(parse_bounds("51.0,-0.5,52.0").is_err()); - } - - #[test] - fn parse_bounds_invalid_too_many_parts() { - assert!(parse_bounds("51.0,-0.5,52.0,0.5,1.0").is_err()); - } - - #[test] - fn parse_bounds_invalid_non_numeric() { - assert!(parse_bounds("51.0,abc,52.0,0.5").is_err()); - } - - #[test] - fn parse_bounds_empty_string() { - assert!(parse_bounds("").is_err()); - } - } - - // ========================================================================= - // Bounds Intersection Tests - // ========================================================================= - - mod bounds_intersection { - use super::*; - - #[test] - fn overlapping_boxes_intersect() { - assert!(bounds_intersect( - 0.0, 0.0, 2.0, 2.0, // Box A - 1.0, 1.0, 3.0, 3.0 // Box B overlaps - )); - } - - #[test] - fn one_box_inside_other_intersects() { - assert!(bounds_intersect( - 0.0, 0.0, 10.0, 10.0, // Box A (large) - 2.0, 2.0, 5.0, 5.0 // Box B (inside A) - )); - } - - #[test] - fn touching_at_corner_intersects() { - assert!(bounds_intersect( - 0.0, 0.0, 1.0, 1.0, // Box A - 1.0, 1.0, 2.0, 2.0 // Box B touches at (1,1) - )); - } - - #[test] - fn touching_at_edge_intersects() { - assert!(bounds_intersect( - 0.0, 0.0, 1.0, 1.0, // Box A - 1.0, 0.0, 2.0, 1.0 // Box B touches along right edge - )); - } - - #[test] - fn disjoint_horizontally_no_intersect() { - assert!(!bounds_intersect( - 0.0, 0.0, 1.0, 1.0, // Box A - 0.0, 2.0, 1.0, 3.0 // Box B to the right - )); - } - - #[test] - fn disjoint_vertically_no_intersect() { - assert!(!bounds_intersect( - 0.0, 0.0, 1.0, 1.0, // Box A - 2.0, 0.0, 3.0, 1.0 // Box B above - )); - } - - #[test] - fn disjoint_diagonally_no_intersect() { - assert!(!bounds_intersect( - 0.0, 0.0, 1.0, 1.0, // Box A - 2.0, 2.0, 3.0, 3.0 // Box B diagonally away - )); - } - - #[test] - fn negative_coordinates_intersect() { - assert!(bounds_intersect( - -2.0, -2.0, -1.0, -1.0, // Box A (negative coords) - -1.5, -1.5, -0.5, -0.5 // Box B overlaps - )); - } - } - - // ========================================================================= - // H3 Cell Bounds Tests - // ========================================================================= - - mod h3_bounds { - use super::*; - use std::str::FromStr; - - #[test] - fn h3_cell_bounds_zero_buffer() { - let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - let (south, west, north, east) = h3_cell_bounds(cell, 0.0); - - // San Francisco area, should be roughly 37.77°N, -122.4°W - assert!(south < north, "south {} should be < north {}", south, north); - assert!(west < east, "west {} should be < east {}", west, east); - assert!(south > 30.0 && south < 45.0); - assert!(west < -100.0); - } - - #[test] - fn h3_cell_bounds_with_buffer() { - let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - let (s0, w0, n0, e0) = h3_cell_bounds(cell, 0.0); - let (s1, w1, n1, e1) = h3_cell_bounds(cell, 0.1); - - // With buffer, bounds should be larger - assert!(s1 < s0, "south with buffer should be smaller"); - assert!(w1 < w0, "west with buffer should be smaller"); - assert!(n1 > n0, "north with buffer should be larger"); - assert!(e1 > e0, "east with buffer should be larger"); - - // Buffer should be exactly 0.1 degrees - assert!((s0 - s1 - 0.1).abs() < 1e-10); - assert!((w0 - w1 - 0.1).abs() < 1e-10); - } - - #[test] - fn h3_cell_bounds_different_resolutions() { - // Resolution 9 cell - let cell_high = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - // Get its resolution 5 parent - let res5 = h3o::Resolution::try_from(5).unwrap(); - let cell_low = cell_high.parent(res5).unwrap(); - - let (s_low, w_low, n_low, e_low) = h3_cell_bounds(cell_low, 0.0); - let (s_high, w_high, n_high, e_high) = h3_cell_bounds(cell_high, 0.0); - - // Lower resolution cell should have larger bounds - let area_low = (n_low - s_low) * (e_low - w_low); - let area_high = (n_high - s_high) * (e_high - w_high); - assert!(area_low > area_high, "Lower res should have larger area"); - } - } - - // ========================================================================= - // Histogram Computation Tests - // ========================================================================= - - mod histogram { - use super::*; - - fn make_fixed_bounds(min: f32, max: f32) -> Bounds { - Bounds::Fixed { min, max } - } - - fn make_percentile_bounds(low: f64, high: f64) -> Bounds { - Bounds::Percentile { low, high } - } - - #[test] - fn histogram_empty_data() { - let data: Vec = vec![]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.slider_min, 0.0); - assert_eq!(stats.slider_max, 100.0); - assert_eq!(stats.histogram.counts.iter().sum::(), 0); - } - - #[test] - fn histogram_single_value() { - let data = vec![50.0_f32]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.histogram.min, 50.0); - assert_eq!(stats.histogram.max, 50.0); - assert_eq!(stats.histogram.counts.iter().sum::(), 1); - } - - #[test] - fn histogram_uniform_distribution() { - // 100 values from 0 to 99 - let data: Vec = (0..100).map(|i| i as f32).collect(); - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.histogram.min, 0.0); - assert_eq!(stats.histogram.max, 99.0); - assert_eq!(stats.histogram.counts.iter().sum::(), 100); - } - - #[test] - fn histogram_with_nan_values() { - let data = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 30.0]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - // Only 3 non-NaN values - assert_eq!(stats.histogram.counts.iter().sum::(), 3); - assert_eq!(stats.histogram.min, 10.0); - assert_eq!(stats.histogram.max, 30.0); - } - - #[test] - fn histogram_all_nan() { - let data = vec![f32::NAN, f32::NAN, f32::NAN]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.histogram.counts.iter().sum::(), 0); - } - - #[test] - fn histogram_all_same_value() { - let data = vec![42.0_f32; 1000]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.histogram.min, 42.0); - assert_eq!(stats.histogram.max, 42.0); - assert_eq!(stats.histogram.p1, 42.0); - assert_eq!(stats.histogram.p99, 42.0); - assert_eq!(stats.histogram.counts.iter().sum::(), 1000); - } - - #[test] - fn histogram_percentile_bounds() { - // Data with outliers: 1 very low, 1 very high, 98 in middle - let mut data: Vec = vec![0.0]; // Low outlier - data.extend((1..99).map(|i| 50.0 + i as f32 * 0.01)); // Main data around 50 - data.push(1000.0); // High outlier - - let bounds = make_percentile_bounds(2.0, 98.0); - let stats = compute_feature_stats(&data, &bounds); - - // Slider should exclude outliers - assert!(stats.slider_min > 0.0); - assert!(stats.slider_max < 1000.0); - } - - #[test] - fn histogram_bin_for_value() { - let hist = Histogram { - min: 0.0, - max: 100.0, - p1: 10.0, - p99: 90.0, - counts: vec![0; 10], // 10 bins - }; - - // Low outlier bin (bin 0) - assert_eq!(hist.bin_for_value(5.0), 0); - - // High outlier bin (bin 9) - assert_eq!(hist.bin_for_value(95.0), 9); - - // Middle bins (bins 1-8) - let mid_value = 50.0; - let bin = hist.bin_for_value(mid_value); - assert!(bin >= 1 && bin <= 8); - } - - #[test] - fn histogram_middle_bin_width() { - let hist = Histogram { - min: 0.0, - max: 100.0, - p1: 10.0, - p99: 90.0, - counts: vec![0; 10], // 10 bins - }; - - // Middle bins span p1 to p99 (80 units) across 8 bins (10 - 2 outlier bins) - let expected_width = (90.0 - 10.0) / 8.0; - assert!((hist.middle_bin_width() - expected_width).abs() < 0.001); - } - - #[test] - fn histogram_cardinality_caps_bins() { - // Only 3 unique values - should cap bins at 3 - let data = vec![1.0_f32, 1.0, 2.0, 2.0, 3.0, 3.0]; - let bounds = make_fixed_bounds(0.0, 100.0); - let stats = compute_feature_stats(&data, &bounds); - - // Bins should be capped at cardinality (3) - assert_eq!(stats.histogram.counts.len(), 3); - } - } - - // ========================================================================= - // Aggregation Semantics Tests - // ========================================================================= - - mod aggregation { - /// Test that min/max aggregation correctly handles NaN values - #[test] - fn min_max_skips_nan() { - let values = vec![10.0_f32, f32::NAN, 20.0, f32::NAN, 5.0]; - - let mut min = f32::INFINITY; - let mut max = f32::NEG_INFINITY; - for &v in &values { - if v.is_finite() { - if v < min { - min = v; - } - if v > max { - max = v; - } - } - } - - assert_eq!(min, 5.0); - assert_eq!(max, 20.0); - } - - /// Test that counting only counts non-NaN values - #[test] - fn count_skips_nan() { - let values = vec![1.0_f32, f32::NAN, 2.0, f32::NAN, 3.0]; - let count = values.iter().filter(|v| v.is_finite()).count(); - assert_eq!(count, 3); - } - - /// Test enum value counting with indices - #[test] - fn enum_value_counting() { - // Enum values: 0.0=Detached, 1.0=Semi, 2.0=Terraced, 3.0=Flat - let values = vec![0.0_f32, 1.0, 1.0, 2.0, f32::NAN, 3.0, 1.0]; - let enum_count = 4; - - let mut counts = vec![0u64; enum_count]; - for &v in &values { - if v.is_finite() { - let idx = v as usize; - if idx < enum_count { - counts[idx] += 1; - } - } - } - - assert_eq!(counts[0], 1); // Detached - assert_eq!(counts[1], 3); // Semi - assert_eq!(counts[2], 1); // Terraced - assert_eq!(counts[3], 1); // Flat - } - } - - // ========================================================================= - // H3 Resolution Tests - // ========================================================================= - - mod h3_resolution { - use std::str::FromStr; - - #[test] - fn parent_cell_at_lower_resolution() { - // Resolution 9 cell - let child = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - - // Get parent at resolution 7 - let parent_res = h3o::Resolution::try_from(7).unwrap(); - let parent = child.parent(parent_res).unwrap(); - - assert_eq!(parent.resolution(), parent_res); - - // Child should be contained in parent - assert!(parent.children(child.resolution()).any(|c| c == child)); - } - - #[test] - fn same_resolution_returns_self() { - let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - let res = cell.resolution(); - - // Getting parent at same resolution should return the cell itself - let parent = cell.parent(res); - assert_eq!(parent, Some(cell)); - } - - #[test] - fn higher_resolution_parent_fails() { - // Resolution 9 cell - let cell = h3o::CellIndex::from_str("8928308280fffff").unwrap(); - - // Try to get "parent" at higher resolution (impossible) - let higher_res = h3o::Resolution::try_from(10).unwrap(); - let parent = cell.parent(higher_res); - assert!(parent.is_none()); - } - } - - // ========================================================================= - // Edge Cases and Error Handling - // ========================================================================= - - mod edge_cases { - use super::*; - - #[test] - fn very_large_coordinates() { - let lat = vec![89.9_f32, -89.9]; - let lon = vec![179.9_f32, -179.9]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - let result = grid.query(-90.0, -180.0, 90.0, 180.0); - assert_eq!(result.len(), 2); - } - - #[test] - fn filter_at_float_precision_boundary() { - let value = 100.0_f32; - let filter = ParsedFilter { - feat_idx: 0, - min: 100.0 - f32::EPSILON, - max: 100.0 + f32::EPSILON, - }; - - assert!(row_passes_filters(0, &[filter], &[], &[value], 1)); - } - - #[test] - fn enum_filter_with_fractional_index() { - // What happens if the stored value isn't exactly an integer? - let feature_data = vec![1.5_f32]; // Not exactly 1.0 or 2.0 - let enum_filters = vec![ParsedEnumFilter { - feat_idx: 0, - allowed: vec![1.0, 2.0], - }]; - - // 1.5 is not in the allowed list [1.0, 2.0] - assert!(!row_passes_filters(0, &[], &enum_filters, &feature_data, 1)); - } - - #[test] - #[test] - fn bounds_with_inverted_min_max() { - // What if south > north? (Invalid input) - // The parse_bounds function doesn't validate this - let (south, _west, north, _east) = parse_bounds("52.0,-0.5,51.0,0.5").unwrap(); - assert_eq!(south, 52.0); - assert_eq!(north, 51.0); - // south > north is allowed by parsing but logically invalid - - // GridIndex should handle this gracefully - let lat = vec![51.5_f32]; - let lon = vec![-0.1_f32]; - let grid = GridIndex::build(&lat, &lon, 0.01); - - // Query with inverted bounds returns empty (row_min > row_max is rejected) - let result = grid.query(52.0, -0.5, 51.0, 0.5); - assert!(result.is_empty(), "Inverted bounds should return empty"); - } - - #[test] - fn infinity_values_in_data() { - // NOTE: The current implementation uses !is_nan() not is_finite() - // So INFINITY values ARE included in min/max calculations. - // This documents current behavior - consider if this should be fixed. - let data = vec![f32::INFINITY, f32::NEG_INFINITY, 50.0]; - let bounds = Bounds::Fixed { - min: 0.0, - max: 100.0, - }; - let stats = compute_feature_stats(&data, &bounds); - - // Current behavior: infinity is included (uses !is_nan()) - assert_eq!(stats.histogram.min, f32::NEG_INFINITY); - assert_eq!(stats.histogram.max, f32::INFINITY); - // All 3 values are counted (none are NaN) - assert_eq!(stats.histogram.counts.iter().sum::(), 3); - } - - #[test] - fn only_finite_values() { - // Test that normal finite values work correctly - let data = vec![10.0_f32, 20.0, 30.0]; - let bounds = Bounds::Fixed { - min: 0.0, - max: 100.0, - }; - let stats = compute_feature_stats(&data, &bounds); - - assert_eq!(stats.histogram.min, 10.0); - assert_eq!(stats.histogram.max, 30.0); - assert_eq!(stats.histogram.counts.iter().sum::(), 3); - } - } -} diff --git a/server-rs/src/utils/grid_index.rs b/server-rs/src/utils/grid_index.rs index 242c481..99128e8 100644 --- a/server-rs/src/utils/grid_index.rs +++ b/server-rs/src/utils/grid_index.rs @@ -222,4 +222,108 @@ mod tests { let grid = GridIndex::build(&[], &[], 0.1); assert!(grid.query(-90.0, -180.0, 90.0, 180.0).is_empty()); } + + #[test] + fn single_point_inside_query() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(51.4, -0.2, 51.6, 0.0); + assert_eq!(result.len(), 1); + assert_eq!(result[0], 0); + } + + #[test] + fn single_point_outside_query() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(52.0, 0.0, 53.0, 1.0); + assert!(result.is_empty()); + } + + #[test] + fn multiple_points_partial_query() { + let lat = vec![51.5_f32, 51.6, 51.7, 52.0]; + let lon = vec![-0.1_f32, -0.1, -0.1, -0.1]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(51.4, -0.2, 51.8, 0.0); + assert_eq!(result.len(), 3); + assert!(result.contains(&0)); + assert!(result.contains(&1)); + assert!(result.contains(&2)); + assert!(!result.contains(&3)); + } + + #[test] + fn query_at_grid_boundary() { + let lat = vec![51.0_f32, 51.01, 51.02]; + let lon = vec![0.0_f32, 0.01, 0.02]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(50.99, -0.01, 51.005, 0.005); + assert!(result.contains(&0)); + } + + #[test] + fn for_each_matches_query() { + let lat = vec![51.5_f32, 51.6, 51.7]; + let lon = vec![-0.1_f32, -0.2, -0.3]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let query_result = grid.query(51.4, -0.25, 51.65, 0.0); + + let mut foreach_result = Vec::new(); + grid.for_each_in_bounds(51.4, -0.25, 51.65, 0.0, |idx| { + foreach_result.push(idx); + }); + + assert_eq!(query_result.len(), foreach_result.len()); + for idx in &query_result { + assert!(foreach_result.contains(idx)); + } + } + + #[test] + fn negative_coordinates() { + let lat = vec![-33.9_f32, -33.8, -33.7]; + let lon = vec![151.2_f32, 151.3, 151.4]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(-34.0, 151.1, -33.65, 151.5); + assert_eq!(result.len(), 3); + } + + #[test] + fn query_bounds_completely_outside_grid() { + let lat = vec![51.5_f32]; + let lon = vec![-0.1_f32]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(0.0, 100.0, 10.0, 110.0); + assert!(result.is_empty()); + } + + #[test] + fn very_small_cell_size() { + let lat = vec![51.5_f32, 51.5001, 51.5002]; + let lon = vec![-0.1_f32, -0.1001, -0.1002]; + let grid = GridIndex::build(&lat, &lon, 0.0001); + + let result = grid.query(51.4999, -0.1003, 51.5003, -0.0999); + assert_eq!(result.len(), 3); + } + + #[test] + fn very_large_coordinates() { + let lat = vec![89.9_f32, -89.9]; + let lon = vec![179.9_f32, -179.9]; + let grid = GridIndex::build(&lat, &lon, 0.01); + + let result = grid.query(-90.0, -180.0, 90.0, 180.0); + assert_eq!(result.len(), 2); + } }