use std::collections::HashMap; use std::str::FromStr; use std::sync::Arc; use axum::extract::{Query, State}; use axum::http::StatusCode; use axum::response::{IntoResponse, Json}; use axum::Extension; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tracing::{info, warn}; use crate::auth::OptionalUser; use crate::licensing::check_license_bounds; use crate::parsing::{ cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters, row_passes_filters, validate_h3_resolution, }; use crate::state::SharedState; use super::stats; #[derive(Serialize)] pub struct HistogramStats { pub min: f64, pub max: f64, /// 1st percentile (left edge of main distribution) pub p1: f64, /// 99th percentile (right edge of main distribution) pub p99: f64, pub counts: Vec, } #[derive(Serialize)] pub struct NumericFeatureStats { pub name: String, pub count: usize, pub min: f64, pub max: f64, pub mean: f64, pub histogram: HistogramStats, } #[derive(Serialize)] pub struct EnumFeatureStats { pub name: String, pub counts: HashMap, } #[derive(Serialize)] pub struct PricePoint { pub year: f32, pub price: f32, } #[derive(Serialize)] pub struct HexagonStatsResponse { pub count: usize, pub numeric_features: Vec, pub enum_features: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] pub price_history: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub central_postcode: Option, } #[derive(Deserialize)] pub struct HexagonStatsParams { pub h3: String, pub resolution: u8, pub filters: Option, /// Comma-separated feature names to include in stats response. /// Only listed features are computed; if absent or empty, no features are returned. pub fields: Option, /// When set (with journey_slug), pick central_postcode as the postcode with the /// shortest travel time for this mode+slug (so it has journey data). pub journey_mode: Option, pub journey_slug: Option, } pub async fn get_hexagon_stats( State(shared): State>, Extension(user): Extension, Query(params): Query, ) -> Result, axum::response::Response> { let state = shared.load_state(); let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| { warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index"); ( StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error), ) .into_response() })?; let cell_u64: u64 = cell.into(); let resolution = params.resolution; validate_h3_resolution(resolution).map_err(IntoResponse::into_response)?; // License check using H3 cell bounds let h3_bounds = h3_cell_bounds(cell, 0.0); check_license_bounds(&user.0, h3_bounds)?; let h3_str = params.h3; let quant = state.data.quant_ref(); let (parsed_filters, parsed_enum_filters) = parse_filters( params.filters.as_deref(), &state.feature_name_to_index, &state.data.enum_values, &quant, ) .map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?; let num_filters = parsed_filters.len() + parsed_enum_filters.len(); let filters_str = params.filters; let (fields_specified, field_set) = parse_field_set(params.fields.as_deref()); // Load travel time data for central_postcode selection (if requested) let journey_travel_data = match (¶ms.journey_mode, ¶ms.journey_slug) { (Some(mode), Some(slug)) if state.travel_time_store.has_destination(mode, slug) => { state.travel_time_store.get(mode, slug).ok() } _ => None, }; let response = tokio::task::spawn_blocking(move || { let start_time = std::time::Instant::now(); let precomputed = &state.h3_cells; let h3_res = h3o::Resolution::try_from(resolution) .map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?; let need_parent = needs_parent(resolution); let num_features = state.data.num_features; let feature_data = &state.data.feature_data; let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001); let mut h3_cache: FxHashMap = FxHashMap::default(); let mut matching_rows: Vec = Vec::new(); state .grid .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| { let row = row_idx as usize; if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache) == cell_u64 && row_passes_filters( row, &parsed_filters, &parsed_enum_filters, feature_data, num_features, ) { matching_rows.push(row); } }); let total_count = matching_rows.len(); // Pick central_postcode: prefer the postcode with the shortest travel time // for the requested journey destination (so it has journey data). Fall back // to geographic proximity to the hexagon center. let central_postcode = if !matching_rows.is_empty() { if let Some(ref travel_data) = journey_travel_data { // Find the row with the shortest travel time in the travel data let best_row = matching_rows .iter() .copied() .filter_map(|row| { let pc = state.data.postcode(row); travel_data.get(pc).map(|td| (row, td.minutes)) }) .min_by_key(|&(_, mins)| mins) .map(|(row, _)| row); // Fall back to geographic center if no row has travel data let row = best_row.unwrap_or_else(|| { let center: h3o::LatLng = cell.into(); let center_lat = center.lat() as f32; let center_lon = center.lng() as f32; matching_rows .iter() .copied() .min_by(|&a, &b| { let da = (state.data.lat[a] - center_lat).powi(2) + (state.data.lon[a] - center_lon).powi(2); let db = (state.data.lat[b] - center_lat).powi(2) + (state.data.lon[b] - center_lon).powi(2); da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal) }) .expect("matching_rows is non-empty") }); Some(state.data.postcode(row).to_string()) } else { // No journey destination requested — use geographic center let center: h3o::LatLng = cell.into(); let center_lat = center.lat() as f32; let center_lon = center.lng() as f32; let closest_row = matching_rows .iter() .copied() .min_by(|&a, &b| { let da = (state.data.lat[a] - center_lat).powi(2) + (state.data.lon[a] - center_lon).powi(2); let db = (state.data.lat[b] - center_lat).powi(2) + (state.data.lon[b] - center_lon).powi(2); da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal) }) .expect("matching_rows is non-empty"); Some(state.data.postcode(closest_row).to_string()) } } else { None }; let price_history = stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index); let (numeric_features, enum_features_out) = stats::compute_feature_stats( &matching_rows, &state.data, &state.data.feature_names, &state.data.enum_values, &state.data.feature_stats, fields_specified, &field_set, ); let elapsed = start_time.elapsed(); info!( h3 = %h3_str, resolution, total_count, filters = num_filters, filters_raw = filters_str.as_deref().unwrap_or("-"), ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0), "GET /api/hexagon-stats" ); Ok(HexagonStatsResponse { count: total_count, numeric_features, enum_features: enum_features_out, price_history, central_postcode, }) }) .await .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()).into_response())? .map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error).into_response())?; Ok(Json(response)) }