use std::fmt::Write; use std::str::FromStr; use std::sync::Arc; use axum::extract::Query; use axum::http::StatusCode; use axum::response::{IntoResponse, Json}; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use crate::consts::{H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN}; use crate::data::{Histogram, POIData, PropertyData, POI}; use crate::index::GridIndex; /// Shared application state pub struct AppState { pub data: PropertyData, pub grid: GridIndex, /// h3_cells[resolution][row_idx] = precomputed H3 cell ID. /// Empty Vec for resolutions not precomputed. pub h3_cells: Vec>, pub poi_data: POIData, pub poi_grid: GridIndex, } const BOUNDS_BUFFER_PERCENT: f64 = 0.2; // ── /api/features ── #[derive(Serialize)] pub struct FeatureInfo { name: String, min: f64, max: f64, label: String, histogram: Histogram, } #[derive(Serialize)] pub struct FeaturesResponse { features: Vec, } fn snake_to_label(name: &str) -> String { name.split('_') .map(|word| { let mut chars = word.chars(); match chars.next() { None => String::new(), Some(c) => { let mut s = c.to_uppercase().to_string(); s.extend(chars); s } } }) .collect::>() .join(" ") } pub async fn get_features(state: Arc) -> Json { let features = state .data .feature_names .iter() .enumerate() .map(|(i, name): (usize, &String)| { let stats = &state.data.feature_stats[i]; FeatureInfo { name: name.clone(), min: stats.p_low, max: stats.p_high, label: snake_to_label(name), histogram: stats.histogram.clone(), } }) .collect(); Json(FeaturesResponse { features }) } // ── /api/hexagons ── #[derive(Deserialize)] pub struct HexagonParams { resolution: u8, bounds: Option, /// Comma-separated filters: `name:min:max,...` /// Rows must have non-NaN values within [min,max] for each filter. filters: Option, } struct ParsedFilter { feat_idx: usize, min: f64, max: f64, } /// Per-cell accumulator for aggregating features struct CellAgg { count: u32, mins: Vec, maxs: Vec, } impl CellAgg { fn new(num_features: usize) -> Self { CellAgg { count: 0, mins: vec![f64::INFINITY; num_features], maxs: vec![f64::NEG_INFINITY; num_features], } } /// Add a row using row-major feature_data layout. /// feature_data[row * num_features + feat_idx] — all features for one row /// are contiguous, so this reads a single cache line per ~8 features. #[inline] fn add_row(&mut self, feature_data: &[f64], row: usize, num_features: usize) { self.count += 1; let base = row * num_features; let row_slice = &feature_data[base..base + num_features]; for (i, &v) in row_slice.iter().enumerate() { if v.is_finite() { if v < self.mins[i] { self.mins[i] = v; } if v > self.maxs[i] { self.maxs[i] = v; } } } } } /// Write the hexagons JSON response directly to a String buffer, /// avoiding serde_json::Value allocations entirely. fn write_hexagons_json( buf: &mut String, groups: &FxHashMap, min_keys: &[String], max_keys: &[String], num_features: usize, ) { buf.push_str("{\"features\":["); let mut first = true; for (&cell_id, agg) in groups { if !first { buf.push(','); } first = false; let cell = h3o::CellIndex::try_from(cell_id).unwrap(); write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, agg.count).unwrap(); for i in 0..num_features { if agg.mins[i] != f64::INFINITY { write!( buf, ",\"{}\":{},\"{}\":{}", min_keys[i], agg.mins[i], max_keys[i], agg.maxs[i] ) .unwrap(); } } buf.push('}'); } buf.push_str("]}"); } pub async fn get_hexagons( state: Arc, Query(params): Query, ) -> Result { let resolution = params.resolution; if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX { return Err(( StatusCode::BAD_REQUEST, format!( "resolution must be between {} and {}", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX ), )); } let bounds_str = params.bounds.ok_or(( StatusCode::BAD_REQUEST, "bounds parameter is required".into(), ))?; let parts: Vec = bounds_str .split(',') .map(|s| s.trim().parse::()) .collect::, _>>() .map_err(|_| { ( StatusCode::BAD_REQUEST, "Invalid bounds format. Use: south,west,north,east".into(), ) })?; if parts.len() != 4 { return Err(( StatusCode::BAD_REQUEST, "Invalid bounds format. Use: south,west,north,east".into(), )); } let (mut south, mut west, mut north, mut east) = (parts[0], parts[1], parts[2], parts[3]); // Apply bounds buffer (20%) let lat_range = north - south; let lng_range = east - west; south -= lat_range * BOUNDS_BUFFER_PERCENT; north += lat_range * BOUNDS_BUFFER_PERCENT; west -= lng_range * BOUNDS_BUFFER_PERCENT; east += lng_range * BOUNDS_BUFFER_PERCENT; // Quantize to 0.01 degree precision let precision = 0.01; south = (south / precision).floor() * precision; west = (west / precision).floor() * precision; north = (north / precision).ceil() * precision; east = (east / precision).ceil() * precision; // Parse filters: `name:min:max,...` let parsed_filters: Vec = params .filters .as_deref() .filter(|s| !s.is_empty()) .map(|s| { s.split(',') .filter_map(|entry| { let parts: Vec<&str> = entry.splitn(3, ':').collect(); if parts.len() != 3 { return None; } let name = parts[0].trim(); let min = parts[1].trim().parse::().ok()?; let max = parts[2].trim().parse::().ok()?; let feat_idx = state.data.feature_names.iter().position(|n| n == name)?; Some(ParsedFilter { feat_idx, min, max }) }) .collect() }) .unwrap_or_default(); // Move CPU-heavy work off the async executor let json_body = tokio::task::spawn_blocking(move || { let t0 = std::time::Instant::now(); let num_features = state.data.num_features; let feature_data = &state.data.feature_data; // Pre-compute JSON key strings once let min_keys: Vec = state .data .feature_names .iter() .map(|n| format!("min_{}", n)) .collect(); let max_keys: Vec = state .data .feature_names .iter() .map(|n| format!("max_{}", n)) .collect(); // Use precomputed H3 cells if available let h3_cells_for_res: Option<&[u64]> = state .h3_cells .get(resolution as usize) .filter(|v| !v.is_empty()) .map(|v| v.as_slice()); // Aggregate using FxHashMap (fast non-crypto hash for integer keys) // and grid visitor (no intermediate Vec allocation) let mut groups: FxHashMap = FxHashMap::default(); // Row-level filter check: value must be non-NaN and within [min, max] let row_passes = |row: usize| -> bool { parsed_filters.iter().all(|f| { let v = feature_data[row * num_features + f.feat_idx]; v.is_finite() && v >= f.min && v <= f.max }) }; if let Some(precomputed) = h3_cells_for_res { // Fast path: precomputed H3 + visitor pattern state .grid .for_each_in_bounds(south, west, north, east, |row_idx| { let row = row_idx as usize; if !row_passes(row) { return; } let cell_id = precomputed[row]; groups .entry(cell_id) .or_insert_with(|| CellAgg::new(num_features)) .add_row(feature_data, row, num_features); }); } else { // Fallback: compute H3 on-the-fly let h3_res = h3o::Resolution::try_from(resolution).unwrap(); state .grid .for_each_in_bounds(south, west, north, east, |row_idx| { let row = row_idx as usize; if !row_passes(row) { return; } let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row]) .map(|c| u64::from(c.to_cell(h3_res))) .unwrap_or(0); groups .entry(cell_id) .or_insert_with(|| CellAgg::new(num_features)) .add_row(feature_data, row, num_features); }); } let t_agg = t0.elapsed(); // Write JSON directly (no serde_json::Value allocation overhead) let mut json_buf = String::with_capacity(groups.len() * 128); write_hexagons_json(&mut json_buf, &groups, &min_keys, &max_keys, num_features); let t_total = t0.elapsed(); eprintln!( "hexagons: res={} cells={} agg={:?} json={:?} total={:?} bytes={}", resolution, groups.len(), t_agg, t_total - t_agg, t_total, json_buf.len() ); json_buf }) .await .unwrap(); Ok(([("content-type", "application/json")], json_body)) } // ── /api/pois ── #[derive(Deserialize)] pub struct POIParams { bounds: Option, /// Comma-separated list of categories to filter by categories: Option, } #[derive(Serialize)] pub struct POIsResponse { pois: Vec, } pub async fn get_pois( state: Arc, Query(params): Query, ) -> Result, (StatusCode, String)> { let bounds_str = params.bounds.ok_or(( StatusCode::BAD_REQUEST, "bounds parameter is required".into(), ))?; let parts: Vec = bounds_str .split(',') .map(|s| s.trim().parse::()) .collect::, _>>() .map_err(|_| { ( StatusCode::BAD_REQUEST, "Invalid bounds format. Use: south,west,north,east".into(), ) })?; if parts.len() != 4 { return Err(( StatusCode::BAD_REQUEST, "Invalid bounds format. Use: south,west,north,east".into(), )); } let (south, west, north, east) = (parts[0], parts[1], parts[2], parts[3]); // Parse category filter if provided let category_filter: Option> = params .categories .as_deref() .filter(|s| !s.is_empty()) .map(|s| s.split(',').map(|c| c.trim().to_string()).collect()); // Move CPU-heavy work off the async executor let result = tokio::task::spawn_blocking(move || { // Spatial query using grid index let row_indices = state.poi_grid.query(south, west, north, east); let pois: Vec = row_indices .iter() .filter_map(|&row_idx| { let row = row_idx as usize; // Apply category filter if specified if let Some(ref categories) = category_filter { if !categories.contains(&state.poi_data.category[row]) { return None; } } Some(POI { id: state.poi_data.id[row].clone(), name: state.poi_data.name[row].clone(), category: state.poi_data.category[row].clone(), lat: state.poi_data.lat[row], lng: state.poi_data.lng[row], emoji: state.poi_data.emoji[row].clone(), }) }) .take(5000) .collect(); POIsResponse { pois } }) .await .unwrap(); Ok(Json(result)) } // ── /api/poi-categories ── #[derive(Serialize)] pub struct POICategoriesResponse { categories: Vec, } pub async fn get_poi_categories(state: Arc) -> Json { // Compute unique categories let result = tokio::task::spawn_blocking(move || { let mut categories: Vec = state .poi_data .category .iter() .cloned() .collect::>() .into_iter() .collect(); categories.sort(); POICategoriesResponse { categories } }) .await .unwrap(); Json(result) } // ── /api/hexagon-properties ── #[derive(Deserialize)] pub struct HexagonPropertiesParams { pub h3: String, pub resolution: u8, pub filters: Option, pub limit: Option, pub offset: Option, } #[derive(Serialize)] pub struct Property { // String fields pub address: Option, pub postcode: Option, pub property_type: Option, pub built_form: Option, pub current_energy_rating: Option, pub potential_energy_rating: Option, // Numeric fields pub lat: f64, pub lon: f64, // All other numeric features stored as dynamic map #[serde(flatten)] pub features: FxHashMap, } #[derive(Serialize)] pub struct HexagonPropertiesResponse { pub properties: Vec, pub total: usize, pub limit: usize, pub offset: usize, pub truncated: bool, } /// Helper function to check if a row passes all filters fn row_passes_filters( row: usize, filters: &[ParsedFilter], feature_data: &[f64], num_features: usize, ) -> bool { filters.iter().all(|f| { let v = feature_data[row * num_features + f.feat_idx]; v.is_finite() && v >= f.min && v <= f.max }) } pub async fn get_hexagon_properties( state: Arc, Query(params): Query, ) -> Result, (StatusCode, String)> { // 1. Parse H3 cell ID let cell = h3o::CellIndex::from_str(¶ms.h3) .map_err(|e| (StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e)))?; let cell_u64: u64 = cell.into(); // 2. Validate resolution let resolution = params.resolution as usize; if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() { return Err(( StatusCode::BAD_REQUEST, "Invalid or non-precomputed resolution".to_string(), )); } // 3. Parse filters (reuse existing filter parsing logic from get_hexagons) let parsed_filters: Vec = params .filters .as_deref() .filter(|s| !s.is_empty()) .map(|s| { s.split(',') .filter_map(|entry| { let parts: Vec<&str> = entry.splitn(3, ':').collect(); if parts.len() != 3 { return None; } let name = parts[0].trim(); let min = parts[1].trim().parse::().ok()?; let max = parts[2].trim().parse::().ok()?; let feat_idx = state.data.feature_names.iter().position(|n| n == name)?; Some(ParsedFilter { feat_idx, min, max }) }) .collect() }) .unwrap_or_default(); // Move CPU-heavy work off the async executor let result = tokio::task::spawn_blocking(move || { let h3_data = &state.h3_cells[resolution]; let num_features = state.data.num_features; let feature_data = &state.data.feature_data; // 4. Find all rows with matching H3 cell let matching_rows: Vec = h3_data .iter() .enumerate() .filter_map(|(idx, &h3_cell)| { if h3_cell == cell_u64 { // Apply feature filters if row_passes_filters(idx, &parsed_filters, feature_data, num_features) { Some(idx) } else { None } } else { None } }) .collect(); let total = matching_rows.len(); let limit = params.limit.unwrap_or(100).min(500); let offset = params.offset.unwrap_or(0); let truncated = total > offset + limit; // 5. Extract properties for paginated subset let properties: Vec = matching_rows .iter() .skip(offset) .take(limit) .map(|&row| { // Build dynamic features map let mut features = FxHashMap::default(); let base = row * num_features; for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() { let v = feature_data[base + feat_idx]; if v.is_finite() { features.insert(feat_name.clone(), v); } } // Helper to get non-empty string let get_string = |s: &str| -> Option { if s.is_empty() { None } else { Some(s.to_string()) } }; Property { address: get_string(&state.data.address[row]), postcode: get_string(&state.data.postcode[row]), property_type: get_string(&state.data.property_type[row]), built_form: get_string(&state.data.built_form[row]), current_energy_rating: get_string(&state.data.current_energy_rating[row]), potential_energy_rating: get_string(&state.data.potential_energy_rating[row]), lat: state.data.lat[row], lon: state.data.lon[row], features, } }) .collect(); HexagonPropertiesResponse { properties, total, limit, offset, truncated, } }) .await .unwrap(); Ok(Json(result)) }