diff --git a/server-rs/src/consts.rs b/server-rs/src/consts.rs index 7ccf49e..a72db94 100644 --- a/server-rs/src/consts.rs +++ b/server-rs/src/consts.rs @@ -9,3 +9,6 @@ pub const HISTOGRAM_BINS: usize = 100; /// H3 resolutions to precompute at startup (covers typical zoom levels) pub const H3_PRECOMPUTE_MIN: u8 = 4; pub const H3_PRECOMPUTE_MAX: u8 = 12; + +/// Columns to exclude from feature discovery +pub const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"]; diff --git a/server-rs/src/data.rs b/server-rs/src/data.rs index 6a3cb48..2fec011 100644 --- a/server-rs/src/data.rs +++ b/server-rs/src/data.rs @@ -1,18 +1,13 @@ -use polars::prelude::*; use polars::lazy::frame::LazyFrame; +use polars::prelude::*; use rayon::prelude::*; use serde::Serialize; use std::path::Path; -use crate::consts::{FEATURE_PERCENTILE_LOW, FEATURE_PERCENTILE_HIGH, HISTOGRAM_BINS, H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX}; - -/// Columns to exclude from feature discovery -const EXCLUDED_COLUMNS: &[&str] = &["lat", "lon"]; - -/// H3 valid resolution range (0-15) -pub const MIN_RESOLUTION: u8 = 0; -pub const MAX_RESOLUTION: u8 = 15; -pub const DEFAULT_RESOLUTION: u8 = 8; +use crate::consts::{ + EXCLUDED_COLUMNS, FEATURE_PERCENTILE_HIGH, FEATURE_PERCENTILE_LOW, H3_PRECOMPUTE_MAX, + H3_PRECOMPUTE_MIN, HISTOGRAM_BINS, +}; /// Returns true if the polars DataType is numeric (integer or float) fn is_numeric_dtype(dtype: &DataType) -> bool { @@ -76,7 +71,13 @@ pub struct PropertyData { /// Approximate a percentile from a histogram using linear interpolation. /// `p` is in [0, 100]. `total` is the sum of all bin counts. -fn percentile_from_histogram(counts: &[u64], min: f64, bin_width: f64, total: usize, p: f64) -> f64 { +fn percentile_from_histogram( + counts: &[u64], + min: f64, + bin_width: f64, + total: usize, + p: f64, +) -> f64 { let target = (p / 100.0) * (total as f64 - 1.0); let mut cumulative = 0u64; for (i, &c) in counts.iter().enumerate() { @@ -104,8 +105,12 @@ fn compute_feature_stats(vals: &[f64]) -> FeatureStats { let mut count = 0usize; for &v in vals { if !v.is_nan() { - if v < min { min = v; } - if v > max { max = v; } + if v < min { + min = v; + } + if v > max { + max = v; + } count += 1; } } @@ -222,8 +227,12 @@ impl PropertyData { // Add string columns (using actual column names from parquet) let string_cols = vec![ - "pp_address", "postcode", "pp_property_type", "built_form", - "current_energy_rating", "potential_energy_rating" + "pp_address", + "postcode", + "pp_property_type", + "built_form", + "current_energy_rating", + "potential_energy_rating", ]; // Build selection with proper casting @@ -256,10 +265,20 @@ impl PropertyData { // Extract lat/lon using bulk iterator let lat_series = df.column("lat").unwrap().cast(&DataType::Float64).unwrap(); - let lat: Vec = lat_series.f64().unwrap().into_iter().map(|v| v.unwrap_or(0.0)).collect(); + let lat: Vec = lat_series + .f64() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or(0.0)) + .collect(); let lon_series = df.column("lon").unwrap().cast(&DataType::Float64).unwrap(); - let lon: Vec = lon_series.f64().unwrap().into_iter().map(|v| v.unwrap_or(0.0)).collect(); + let lon: Vec = lon_series + .f64() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or(0.0)) + .collect(); // Extract feature columns (column-major, for cache-friendly histogram computation) eprintln!("Extracting feature columns..."); @@ -281,8 +300,10 @@ impl PropertyData { eprintln!( " {}: p{}={:.2}, p{}={:.2}, {} bins", feature_names[i], - FEATURE_PERCENTILE_LOW, stats.p_low, - FEATURE_PERCENTILE_HIGH, stats.p_high, + FEATURE_PERCENTILE_LOW, + stats.p_low, + FEATURE_PERCENTILE_HIGH, + stats.p_high, stats.histogram.counts.len() ); stats @@ -292,40 +313,66 @@ impl PropertyData { // Extract string columns (before permutation) eprintln!("Extracting string columns..."); let address_raw: Vec = if let Ok(col) = df.column("pp_address") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() } else { vec![String::new(); row_count] }; let postcode_raw: Vec = if let Ok(col) = df.column("postcode") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() } else { vec![String::new(); row_count] }; let property_type_raw: Vec = if let Ok(col) = df.column("pp_property_type") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() } else { vec![String::new(); row_count] }; let built_form_raw: Vec = if let Ok(col) = df.column("built_form") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() } else { vec![String::new(); row_count] }; - let current_energy_rating_raw: Vec = if let Ok(col) = df.column("current_energy_rating") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() - } else { - vec![String::new(); row_count] - }; + let current_energy_rating_raw: Vec = + if let Ok(col) = df.column("current_energy_rating") { + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() + } else { + vec![String::new(); row_count] + }; - let potential_energy_rating_raw: Vec = if let Ok(col) = df.column("potential_energy_rating") { - col.str().unwrap().into_iter().map(|v| v.unwrap_or("").to_string()).collect() - } else { - vec![String::new(); row_count] - }; + let potential_energy_rating_raw: Vec = + if let Ok(col) = df.column("potential_energy_rating") { + col.str() + .unwrap() + .into_iter() + .map(|v| v.unwrap_or("").to_string()) + .collect() + } else { + vec![String::new(); row_count] + }; // Sort all rows by spatial locality so that grid queries access // contiguous memory (sequential reads instead of random DRAM accesses). @@ -349,12 +396,30 @@ impl PropertyData { let lon: Vec = perm.iter().map(|&i| lon[i as usize]).collect(); // Apply permutation to string columns - let address: Vec = perm.iter().map(|&i| address_raw[i as usize].clone()).collect(); - let postcode: Vec = perm.iter().map(|&i| postcode_raw[i as usize].clone()).collect(); - let property_type: Vec = perm.iter().map(|&i| property_type_raw[i as usize].clone()).collect(); - let built_form: Vec = perm.iter().map(|&i| built_form_raw[i as usize].clone()).collect(); - let current_energy_rating: Vec = perm.iter().map(|&i| current_energy_rating_raw[i as usize].clone()).collect(); - let potential_energy_rating: Vec = perm.iter().map(|&i| potential_energy_rating_raw[i as usize].clone()).collect(); + let address: Vec = perm + .iter() + .map(|&i| address_raw[i as usize].clone()) + .collect(); + let postcode: Vec = perm + .iter() + .map(|&i| postcode_raw[i as usize].clone()) + .collect(); + let property_type: Vec = perm + .iter() + .map(|&i| property_type_raw[i as usize].clone()) + .collect(); + let built_form: Vec = perm + .iter() + .map(|&i| built_form_raw[i as usize].clone()) + .collect(); + let current_energy_rating: Vec = perm + .iter() + .map(|&i| current_energy_rating_raw[i as usize].clone()) + .collect(); + let potential_energy_rating: Vec = perm + .iter() + .map(|&i| potential_energy_rating_raw[i as usize].clone()) + .collect(); // Transpose to row-major AND apply spatial permutation in one pass. // Result: all features for one row are contiguous, and spatially @@ -422,7 +487,8 @@ impl POIData { eprintln!("Loaded {} POIs", row_count); // Extract columns - let id: Vec = df.column("id") + let id: Vec = df + .column("id") .unwrap() .str() .unwrap() @@ -430,7 +496,8 @@ impl POIData { .map(|v| v.unwrap_or("").to_string()) .collect(); - let name: Vec = df.column("name") + let name: Vec = df + .column("name") .unwrap() .str() .unwrap() @@ -438,7 +505,8 @@ impl POIData { .map(|v| v.unwrap_or("").to_string()) .collect(); - let category: Vec = df.column("category") + let category: Vec = df + .column("category") .unwrap() .str() .unwrap() @@ -446,7 +514,8 @@ impl POIData { .map(|v| v.unwrap_or("").to_string()) .collect(); - let lat: Vec = df.column("lat") + let lat: Vec = df + .column("lat") .unwrap() .f64() .unwrap() @@ -454,7 +523,8 @@ impl POIData { .map(|v| v.unwrap_or(0.0)) .collect(); - let lng: Vec = df.column("lng") + let lng: Vec = df + .column("lng") .unwrap() .f64() .unwrap() @@ -462,7 +532,8 @@ impl POIData { .map(|v| v.unwrap_or(0.0)) .collect(); - let emoji: Vec = df.column("emoji") + let emoji: Vec = df + .column("emoji") .unwrap() .str() .unwrap() diff --git a/server-rs/src/index.rs b/server-rs/src/index.rs index 5b705ed..ee13588 100644 --- a/server-rs/src/index.rs +++ b/server-rs/src/index.rs @@ -114,7 +114,13 @@ impl GridIndex { } } - fn clamp_bounds(&self, south: f64, west: f64, north: f64, east: f64) -> (usize, usize, usize, usize) { + fn clamp_bounds( + &self, + south: f64, + west: f64, + north: f64, + east: f64, + ) -> (usize, usize, usize, usize) { let r_min = ((south - self.min_lat) / self.cell_size) as isize; let r_max = ((north - self.min_lat) / self.cell_size) as isize; let c_min = ((west - self.min_lon) / self.cell_size) as isize; diff --git a/server-rs/src/main.rs b/server-rs/src/main.rs index 4756e66..ccd8831 100644 --- a/server-rs/src/main.rs +++ b/server-rs/src/main.rs @@ -42,7 +42,10 @@ async fn main() { let poi_data = if poi_path.exists() { data::POIData::load(&poi_path) } else { - eprintln!("Warning: {} not found. POI endpoints will be unavailable.", poi_path.display()); + eprintln!( + "Warning: {} not found. POI endpoints will be unavailable.", + poi_path.display() + ); data::POIData { id: Vec::new(), name: Vec::new(), @@ -93,7 +96,9 @@ async fn main() { ) .route( "/api/hexagon-properties", - get(move |query| routes::get_hexagon_properties(state_hexagon_properties.clone(), query)), + get(move |query| { + routes::get_hexagon_properties(state_hexagon_properties.clone(), query) + }), ); // Static file serving for frontend diff --git a/server-rs/src/routes.rs b/server-rs/src/routes.rs index 9f3d251..509f832 100644 --- a/server-rs/src/routes.rs +++ b/server-rs/src/routes.rs @@ -8,7 +8,8 @@ use axum::response::{IntoResponse, Json}; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use crate::data::{Histogram, PropertyData, POIData, POI, DEFAULT_RESOLUTION, MAX_RESOLUTION, MIN_RESOLUTION}; +use crate::consts::{H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN}; +use crate::data::{Histogram, POIData, PropertyData, POI}; use crate::index::GridIndex; /// Shared application state @@ -82,7 +83,7 @@ pub async fn get_features(state: Arc) -> Json { #[derive(Deserialize)] pub struct HexagonParams { - resolution: Option, + resolution: u8, bounds: Option, /// Comma-separated filters: `name:min:max,...` /// Rows must have non-NaN values within [min,max] for each filter. @@ -130,7 +131,6 @@ impl CellAgg { } } } - } /// Write the hexagons JSON response directly to a String buffer, @@ -172,20 +172,21 @@ pub async fn get_hexagons( state: Arc, Query(params): Query, ) -> Result { - let resolution = params.resolution.unwrap_or(DEFAULT_RESOLUTION); - if resolution > MAX_RESOLUTION { + let resolution = params.resolution; + if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX { return Err(( StatusCode::BAD_REQUEST, format!( "resolution must be between {} and {}", - MIN_RESOLUTION, MAX_RESOLUTION + H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX ), )); } - let bounds_str = params - .bounds - .ok_or((StatusCode::BAD_REQUEST, "bounds parameter is required".into()))?; + let bounds_str = params.bounds.ok_or(( + StatusCode::BAD_REQUEST, + "bounds parameter is required".into(), + ))?; let parts: Vec = bounds_str .split(',') @@ -286,46 +287,44 @@ pub async fn get_hexagons( if let Some(precomputed) = h3_cells_for_res { // Fast path: precomputed H3 + visitor pattern - state.grid.for_each_in_bounds(south, west, north, east, |row_idx| { - let row = row_idx as usize; - if !row_passes(row) { - return; - } - let cell_id = precomputed[row]; - groups - .entry(cell_id) - .or_insert_with(|| CellAgg::new(num_features)) - .add_row(feature_data, row, num_features); - }); + state + .grid + .for_each_in_bounds(south, west, north, east, |row_idx| { + let row = row_idx as usize; + if !row_passes(row) { + return; + } + let cell_id = precomputed[row]; + groups + .entry(cell_id) + .or_insert_with(|| CellAgg::new(num_features)) + .add_row(feature_data, row, num_features); + }); } else { // Fallback: compute H3 on-the-fly let h3_res = h3o::Resolution::try_from(resolution).unwrap(); - state.grid.for_each_in_bounds(south, west, north, east, |row_idx| { - let row = row_idx as usize; - if !row_passes(row) { - return; - } - let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row]) - .map(|c| u64::from(c.to_cell(h3_res))) - .unwrap_or(0); - groups - .entry(cell_id) - .or_insert_with(|| CellAgg::new(num_features)) - .add_row(feature_data, row, num_features); - }); + state + .grid + .for_each_in_bounds(south, west, north, east, |row_idx| { + let row = row_idx as usize; + if !row_passes(row) { + return; + } + let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row]) + .map(|c| u64::from(c.to_cell(h3_res))) + .unwrap_or(0); + groups + .entry(cell_id) + .or_insert_with(|| CellAgg::new(num_features)) + .add_row(feature_data, row, num_features); + }); } let t_agg = t0.elapsed(); // Write JSON directly (no serde_json::Value allocation overhead) let mut json_buf = String::with_capacity(groups.len() * 128); - write_hexagons_json( - &mut json_buf, - &groups, - &min_keys, - &max_keys, - num_features, - ); + write_hexagons_json(&mut json_buf, &groups, &min_keys, &max_keys, num_features); let t_total = t0.elapsed(); eprintln!( @@ -364,9 +363,10 @@ pub async fn get_pois( state: Arc, Query(params): Query, ) -> Result, (StatusCode, String)> { - let bounds_str = params - .bounds - .ok_or((StatusCode::BAD_REQUEST, "bounds parameter is required".into()))?; + let bounds_str = params.bounds.ok_or(( + StatusCode::BAD_REQUEST, + "bounds parameter is required".into(), + ))?; let parts: Vec = bounds_str .split(',') @@ -501,7 +501,12 @@ pub struct HexagonPropertiesResponse { } /// Helper function to check if a row passes all filters -fn row_passes_filters(row: usize, filters: &[ParsedFilter], feature_data: &[f64], num_features: usize) -> bool { +fn row_passes_filters( + row: usize, + filters: &[ParsedFilter], + feature_data: &[f64], + num_features: usize, +) -> bool { filters.iter().all(|f| { let v = feature_data[row * num_features + f.feat_idx]; v.is_finite() && v >= f.min && v <= f.max @@ -520,7 +525,10 @@ pub async fn get_hexagon_properties( // 2. Validate resolution let resolution = params.resolution as usize; if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() { - return Err((StatusCode::BAD_REQUEST, "Invalid or non-precomputed resolution".to_string())); + return Err(( + StatusCode::BAD_REQUEST, + "Invalid or non-precomputed resolution".to_string(), + )); } // 3. Parse filters (reuse existing filter parsing logic from get_hexagons) @@ -592,7 +600,11 @@ pub async fn get_hexagon_properties( // Helper to get non-empty string let get_string = |s: &str| -> Option { - if s.is_empty() { None } else { Some(s.to_string()) } + if s.is_empty() { + None + } else { + Some(s.to_string()) + } }; Property {