use std::fmt::{self, Write}; use std::sync::Arc; use axum::extract::Query; use axum::http::StatusCode; use axum::response::IntoResponse; use rustc_hash::FxHashMap; use serde::Deserialize; use tracing::{info, warn}; use crate::consts::{ BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_REQUEST_MAX, H3_REQUEST_MIN, POSTCODE_MIN_RESOLUTION, }; use crate::filter::parse_filters; use crate::state::AppState; use super::parse::parse_bounds; struct HumanBytes(usize); impl fmt::Display for HumanBytes { fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { let bytes = self.0; if bytes >= 1_000_000 { write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0) } else if bytes >= 1_000 { write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0) } else { write!(formatter, "{} B", bytes) } } } #[derive(Deserialize)] pub struct HexagonParams { resolution: u8, bounds: Option, /// Comma-separated filters: `name:min:max,...` /// Rows must have non-NaN values within [min,max] for each filter. filters: Option, } /// Per-cell accumulator for aggregating features struct CellAgg { count: u32, mins: Vec, maxs: Vec, /// Min/max ordinal indices for enum features (255 = no data yet) enum_mins: Vec, enum_maxs: Vec, /// Most common postcode in this cell (only tracked at high resolutions) postcode: Option, postcode_count: u32, lat_sum: f64, lon_sum: f64, } impl CellAgg { fn new(num_features: usize, num_enums: usize) -> Self { CellAgg { count: 0, mins: vec![f32::INFINITY; num_features], maxs: vec![f32::NEG_INFINITY; num_features], enum_mins: vec![ENUM_NULL; num_enums], enum_maxs: vec![0; num_enums], postcode: None, postcode_count: 0, lat_sum: 0.0, lon_sum: 0.0, } } /// Add a row using row-major feature_data layout. /// feature_data[row * num_features + feat_idx] — all features for one row /// are contiguous, so this reads a single cache line per ~8 features. #[inline] fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) { self.count += 1; let base = row * num_features; let row_slice = &feature_data[base..base + num_features]; for (feat_index, &value) in row_slice.iter().enumerate() { if value.is_finite() { if value < self.mins[feat_index] { self.mins[feat_index] = value; } if value > self.maxs[feat_index] { self.maxs[feat_index] = value; } } } } /// Track min/max ordinal index for each enum feature in this cell. #[inline] fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) { for (enum_index, enum_feature) in enum_features.iter().enumerate() { let value = enum_feature.data[row]; if value != ENUM_NULL { if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] { self.enum_mins[enum_index] = value; } if value > self.enum_maxs[enum_index] { self.enum_maxs[enum_index] = value; } } } } /// Track postcode and centroid for high-resolution cells. /// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode. #[inline] fn add_postcode(&mut self, postcode: &str, lat: f32, lon: f32) { self.lat_sum += lat as f64; self.lon_sum += lon as f64; if postcode.is_empty() { return; } if self.postcode.is_none() { self.postcode = Some(postcode.to_string()); self.postcode_count = 1; } else if self.postcode.as_deref() == Some(postcode) { self.postcode_count += 1; } } } /// Escape a string for inclusion in a JSON string literal. pub(crate) fn write_json_escaped(buf: &mut String, text: &str) { for character in text.chars() { match character { '"' => buf.push_str("\\\""), '\\' => buf.push_str("\\\\"), '\n' => buf.push_str("\\n"), '\r' => buf.push_str("\\r"), '\t' => buf.push_str("\\t"), ctrl if ctrl < '\x20' => { let _ = write!(buf, "\\u{:04x}", ctrl as u32); } other => buf.push(other), } } } /// Write the hexagons JSON response directly to a String buffer, /// avoiding serde_json::Value allocations entirely. #[allow(clippy::too_many_arguments)] fn write_hexagons_json( buf: &mut String, groups: &FxHashMap, min_keys: &[String], max_keys: &[String], num_features: usize, enum_min_keys: &[String], enum_max_keys: &[String], num_enums: usize, include_postcode: bool, ) { buf.push_str("{\"features\":["); let mut first = true; for (&cell_id, aggregation) in groups { let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else { continue; }; if !first { buf.push(','); } first = false; let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count); for feat_index in 0..num_features { if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() { let _ = write!( buf, ",\"{}\":{},\"{}\":{}", min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index] ); } } for enum_index in 0..num_enums { if aggregation.enum_mins[enum_index] != ENUM_NULL { let _ = write!( buf, ",\"{}\":{},\"{}\":{}", enum_min_keys[enum_index], aggregation.enum_mins[enum_index], enum_max_keys[enum_index], aggregation.enum_maxs[enum_index] ); } } if include_postcode { if let Some(ref postcode) = aggregation.postcode { let total = aggregation.count as f64; let centroid_lat = aggregation.lat_sum / total; let centroid_lon = aggregation.lon_sum / total; if centroid_lat.is_finite() && centroid_lon.is_finite() { buf.push_str(",\"postcode\":\""); write_json_escaped(buf, postcode); let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon); } } } buf.push('}'); } buf.push_str("]}"); } pub async fn get_hexagons( state: Arc, Query(params): Query, ) -> Result { let resolution = params.resolution; if !(H3_REQUEST_MIN..=H3_REQUEST_MAX).contains(&resolution) { warn!( resolution, "Resolution out of range [{}, {}]", H3_REQUEST_MIN, H3_REQUEST_MAX ); return Err(( StatusCode::BAD_REQUEST, format!( "resolution must be between {} and {}", H3_REQUEST_MIN, H3_REQUEST_MAX ), )); } let bounds_str = params.bounds.ok_or(( StatusCode::BAD_REQUEST, "bounds parameter is required".into(), ))?; let (mut south, mut west, mut north, mut east) = parse_bounds(&bounds_str)?; let lat_range = north - south; let lng_range = east - west; south -= lat_range * BOUNDS_BUFFER_PERCENT; north += lat_range * BOUNDS_BUFFER_PERCENT; west -= lng_range * BOUNDS_BUFFER_PERCENT; east += lng_range * BOUNDS_BUFFER_PERCENT; south = (south / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION; west = (west / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION; north = (north / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION; east = (east / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION; let filters_str = params.filters.clone(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), &state.data.feature_names, &state.data.enum_features, ); let num_filters = parsed_filters.len() + parsed_enum_filters.len(); let json_body = tokio::task::spawn_blocking(move || -> Result { let t0 = std::time::Instant::now(); let num_features = state.data.num_features; let num_enums = state.data.enum_features.len(); let feature_data = &state.data.feature_data; let min_keys = &state.min_keys; let max_keys = &state.max_keys; let enum_min_keys = &state.enum_min_keys; let enum_max_keys = &state.enum_max_keys; let h3_cells_for_res: Option<&[u64]> = state .h3_cells .get(resolution as usize) .filter(|cells| !cells.is_empty()) .map(|cells| cells.as_slice()); let mut groups: FxHashMap = FxHashMap::default(); let enum_features = &state.data.enum_features; let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION; // Row-level filter check: numeric must be non-NaN and within [min, max], // enum must have value index in the allowed set let row_passes = |row: usize| -> bool { parsed_filters.iter().all(|filter| { let value = feature_data[row * num_features + filter.feat_idx]; value.is_finite() && value >= filter.min && value <= filter.max }) && parsed_enum_filters.iter().all(|enum_filter| { let value = enum_features[enum_filter.enum_idx].data[row]; value != ENUM_NULL && enum_filter.allowed.contains(&value) }) }; if let Some(precomputed) = h3_cells_for_res { state .grid .for_each_in_bounds(south, west, north, east, |row_idx| { let row = row_idx as usize; if !row_passes(row) { return; } let cell_id = precomputed[row]; let aggregation = groups .entry(cell_id) .or_insert_with(|| CellAgg::new(num_features, num_enums)); aggregation.add_row(feature_data, row, num_features); aggregation.add_enums(enum_features, row); if include_postcode { aggregation.add_postcode( state.data.postcode(row), state.data.lat[row], state.data.lon[row], ); } }); } else { let h3_res = h3o::Resolution::try_from(resolution) .map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?; state .grid .for_each_in_bounds(south, west, north, east, |row_idx| { let row = row_idx as usize; if !row_passes(row) { return; } let cell_id = h3o::LatLng::new(state.data.lat[row] as f64, state.data.lon[row] as f64) .map(|coord| u64::from(coord.to_cell(h3_res))) .unwrap_or(0); let aggregation = groups .entry(cell_id) .or_insert_with(|| CellAgg::new(num_features, num_enums)); aggregation.add_row(feature_data, row, num_features); aggregation.add_enums(enum_features, row); if include_postcode { aggregation.add_postcode( state.data.postcode(row), state.data.lat[row], state.data.lon[row], ); } }); } let t_agg = t0.elapsed(); let mut json_buf = String::with_capacity(groups.len() * 128); write_hexagons_json( &mut json_buf, &groups, min_keys, max_keys, num_features, enum_min_keys, enum_max_keys, num_enums, include_postcode, ); let t_total = t0.elapsed(); info!( resolution, cells = groups.len(), filters = num_filters, filters_raw = filters_str.as_deref().unwrap_or("-"), agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0), total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0), size = format_args!("{}", HumanBytes(json_buf.len())), "GET /api/hexagons" ); Ok(json_buf) }) .await .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))? .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?; Ok(([("content-type", "application/json")], json_body)) }