use std::collections::{HashMap, HashSet}; use std::str::FromStr; use std::sync::Arc; use axum::extract::{Query, State}; use axum::http::StatusCode; use axum::response::{IntoResponse, Json}; use axum::Extension; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tracing::{info, warn}; use crate::auth::OptionalUser; use crate::consts::NAN_U16; use crate::data::travel_time::TravelData; use crate::data::PropertyData; use crate::features::{Feature, FEATURE_GROUPS}; use crate::licensing::{check_license_bounds, resolve_share_code}; use crate::parsing::{ cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters_with_poi, row_passes_filters, row_passes_poi_filters, validate_h3_resolution, ParsedEnumFilter, ParsedFilter, ParsedPoiFilter, }; use crate::state::SharedState; use super::stats; use super::travel_time::{ load_travel_data, parse_optional_travel, row_passes_travel_filters, TravelEntry, }; const AREA_STATS_EXCLUDED_GROUPS: &[&str] = &["Amenities"]; const MAX_FILTER_EXCLUSIONS: usize = 5; #[derive(Serialize)] pub struct HistogramStats { pub min: f64, pub max: f64, /// 1st percentile (left edge of main distribution) pub p1: f64, /// 99th percentile (right edge of main distribution) pub p99: f64, pub counts: Vec, } #[derive(Serialize)] pub struct NumericFeatureStats { pub name: String, pub count: usize, pub min: f64, pub max: f64, pub mean: f64, pub histogram: HistogramStats, } #[derive(Serialize)] pub struct EnumFeatureStats { pub name: String, pub counts: HashMap, } #[derive(Serialize)] pub struct PricePoint { pub year: f32, pub price: f32, } #[derive(Serialize)] pub struct FilterExclusion { pub name: String, pub kind: String, pub direction: String, #[serde(skip_serializing_if = "Option::is_none")] pub value: Option, #[serde(skip_serializing_if = "Option::is_none")] pub min: Option, #[serde(skip_serializing_if = "Option::is_none")] pub max: Option, #[serde(skip_serializing_if = "Option::is_none")] pub category: Option, pub relative_difference: f32, pub rejected_count: usize, } fn filter_exclusion_key(exclusion: &FilterExclusion) -> String { format!( "{}\u{1f}{}\u{1f}{}\u{1f}{}", exclusion.kind, exclusion.name, exclusion.direction, exclusion.category.as_deref().unwrap_or("") ) } fn missing_filter_exclusion(name: String, kind: &str) -> FilterExclusion { FilterExclusion { name, kind: kind.to_string(), direction: "missing_value".to_string(), value: None, min: None, max: None, category: None, relative_difference: 1.0, rejected_count: 0, } } #[derive(Serialize)] pub struct HexagonStatsResponse { pub count: usize, pub numeric_features: Vec, pub enum_features: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] pub price_history: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub central_postcode: Option, #[serde(skip_serializing_if = "Vec::is_empty")] pub filter_exclusions: Vec, } #[derive(Deserialize)] pub struct HexagonStatsParams { pub h3: String, pub resolution: u8, pub filters: Option, /// `;;`-separated feature names to include in stats response. /// Only listed features are computed. If absent, area stats default to /// displayable groups; if empty, no feature stats are returned. pub fields: Option, /// When set (with journey_slug), pick central_postcode as the postcode with the /// shortest travel time for this mode+slug (so it has journey data). pub journey_mode: Option, pub journey_slug: Option, /// Pipe-separated travel time entries: `mode:slug|mode:slug:min:max`. /// Optional min:max applies as a filter (exclude properties outside range). pub travel: Option, /// Share-link code; grants bbox-scoped access for unlicensed users. pub share: Option, } fn default_area_stat_field_set() -> HashSet { FEATURE_GROUPS .iter() .filter(|group| !AREA_STATS_EXCLUDED_GROUPS.contains(&group.name)) .flat_map(|group| group.features.iter()) .map(|feature| match feature { Feature::Numeric(config) => config.name.to_string(), Feature::Enum(config) => config.name.to_string(), }) .collect() } pub(super) fn parse_area_stats_field_set(fields: Option<&str>) -> (bool, HashSet) { let (fields_specified, field_set) = parse_field_set(fields); if fields_specified { return (fields_specified, field_set); } (true, default_area_stat_field_set()) } #[inline] fn relative_difference(value: f32, min: f32, max: f32) -> Option<(String, f32)> { let distance = if value < min { min - value } else if value > max { value - max } else { return None; }; let range = (max - min).abs(); let denominator = if range.is_finite() && range > f32::EPSILON { range } else { min.abs().max(max.abs()).max(1.0) }; let direction = if value < min { "lower_min".to_string() } else { "raise_max".to_string() }; Some((direction, distance / denominator)) } pub(super) fn top_filter_exclusions( area_rows: &[usize], numeric_filters: &[ParsedFilter], enum_filters: &[ParsedEnumFilter], poi_filters: &[ParsedPoiFilter], travel_entries: &[TravelEntry], travel_data: &[TravelData], data: &PropertyData, ) -> Vec { if area_rows.is_empty() || (numeric_filters.is_empty() && enum_filters.is_empty() && poi_filters.is_empty() && !travel_entries .iter() .any(|entry| entry.filter_min.is_some() && entry.filter_max.is_some())) { return Vec::new(); } let feature_data = &data.feature_data; let num_features = data.num_features; let quant = data.quant_ref(); let poi_quant = data.poi_metrics.quant_ref(); let mut rejection_counts: HashMap = HashMap::new(); let mut best_path: Option> = None; for &row in area_rows { let mut path = Vec::new(); for filter in numeric_filters { let min = quant.decode(filter.feat_idx, filter.min_u16); let max = quant.decode(filter.feat_idx, filter.max_u16); let raw = feature_data[row * num_features + filter.feat_idx]; if raw == NAN_U16 { path.push(missing_filter_exclusion( data.feature_names[filter.feat_idx].clone(), "numeric", )); continue; } let value = quant.decode(filter.feat_idx, raw); let Some((direction, rel_diff)) = relative_difference(value, min, max) else { continue; }; path.push(FilterExclusion { name: data.feature_names[filter.feat_idx].clone(), kind: "numeric".to_string(), direction, value: Some(value), min: Some(min), max: Some(max), category: None, relative_difference: rel_diff, rejected_count: 0, }); } for filter in enum_filters { let raw = feature_data[row * num_features + filter.feat_idx]; if raw == NAN_U16 { path.push(missing_filter_exclusion( data.feature_names[filter.feat_idx].clone(), "enum", )); continue; } if filter.allowed.contains(&raw) { continue; } let Some(values) = data.enum_values.get(&filter.feat_idx) else { continue; }; let Some(category) = values.get(raw as usize) else { continue; }; path.push(FilterExclusion { name: data.feature_names[filter.feat_idx].clone(), kind: "enum".to_string(), direction: "allow_value".to_string(), value: None, min: None, max: None, category: Some(category.clone()), relative_difference: 1.0, rejected_count: 0, }); } for filter in poi_filters { let min = poi_quant.decode(filter.metric_idx, filter.min_u16); let max = poi_quant.decode(filter.metric_idx, filter.max_u16); let raw = data .poi_metrics .raw_for_property_row(row, filter.metric_idx); if raw == NAN_U16 { path.push(missing_filter_exclusion( data.poi_metrics.feature_names[filter.metric_idx].clone(), "poi", )); continue; } let value = poi_quant.decode(filter.metric_idx, raw); let Some((direction, rel_diff)) = relative_difference(value, min, max) else { continue; }; path.push(FilterExclusion { name: data.poi_metrics.feature_names[filter.metric_idx].clone(), kind: "poi".to_string(), direction, value: Some(value), min: Some(min), max: Some(max), category: None, relative_difference: rel_diff, rejected_count: 0, }); } for (filter_index, entry) in travel_entries.iter().enumerate() { let (Some(min), Some(max)) = (entry.filter_min, entry.filter_max) else { continue; }; let postcode = data.postcode(row); let Some(row_data) = travel_data .get(filter_index) .and_then(|travel| travel.get(postcode)) else { path.push(missing_filter_exclusion( format!("tt_{}_{}", entry.mode, entry.slug), "travel", )); continue; }; let minutes = if entry.use_best { row_data.best_minutes.unwrap_or(row_data.minutes) } else { row_data.minutes } as f32; let Some((direction, rel_diff)) = relative_difference(minutes, min, max) else { continue; }; path.push(FilterExclusion { name: format!("tt_{}_{}", entry.mode, entry.slug), kind: "travel".to_string(), direction, value: Some(minutes), min: Some(min), max: Some(max), category: None, relative_difference: rel_diff, rejected_count: 0, }); } if path.is_empty() { continue; } for exclusion in &path { *rejection_counts .entry(filter_exclusion_key(exclusion)) .or_default() += 1; } let path_score = path .iter() .map(|exclusion| exclusion.relative_difference) .sum::(); let current_score = best_path .as_ref() .map(|current| { current .iter() .map(|exclusion| exclusion.relative_difference) .sum::() }) .unwrap_or(f32::INFINITY); let replace = path_score < current_score || (path_score == current_score && best_path .as_ref() .map_or(true, |current| path.len() < current.len())); if replace { best_path = Some(path); } } let Some(mut exclusions) = best_path else { return Vec::new(); }; for exclusion in &mut exclusions { exclusion.rejected_count = rejection_counts .get(&filter_exclusion_key(exclusion)) .copied() .unwrap_or(0); } exclusions.sort_by(|a, b| { a.relative_difference .partial_cmp(&b.relative_difference) .unwrap_or(std::cmp::Ordering::Equal) .then_with(|| b.rejected_count.cmp(&a.rejected_count)) .then_with(|| a.name.cmp(&b.name)) }); exclusions.truncate(MAX_FILTER_EXCLUSIONS); exclusions } pub async fn get_hexagon_stats( State(shared): State>, Extension(user): Extension, Query(params): Query, ) -> Result, axum::response::Response> { let state = shared.load_state(); let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| { warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index"); ( StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error), ) .into_response() })?; let cell_u64: u64 = cell.into(); let resolution = params.resolution; validate_h3_resolution(resolution).map_err(IntoResponse::into_response)?; // License check using H3 cell bounds let h3_bounds = h3_cell_bounds(cell, 0.0); let share_bounds = resolve_share_code(&state, params.share.as_deref()).await; check_license_bounds(&user.0, h3_bounds, share_bounds)?; let h3_str = params.h3; let quant = state.data.quant_ref(); let poi_quant = state.data.poi_metrics.quant_ref(); let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi( params.filters.as_deref(), &state.feature_name_to_index, &state.data.enum_values, &quant, &state.data.poi_metrics.name_to_index, &poi_quant, ) .map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?; let num_filters = parsed_filters.len() + parsed_enum_filters.len() + parsed_poi_filters.len(); let filters_str = params.filters; let has_poi_filters = !parsed_poi_filters.is_empty(); let (fields_specified, field_set) = parse_area_stats_field_set(params.fields.as_deref()); let travel_entries = parse_optional_travel(params.travel.as_deref()) .map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?; // Load travel time data for central_postcode selection (if requested) let journey_travel_data = match (¶ms.journey_mode, ¶ms.journey_slug) { (Some(mode), Some(slug)) if state.travel_time_store.has_destination(mode, slug) => { state.travel_time_store.get(mode, slug).ok() } _ => None, }; let response = tokio::task::spawn_blocking(move || { let start_time = std::time::Instant::now(); let precomputed = &state.h3_cells; let h3_res = h3o::Resolution::try_from(resolution) .map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?; let need_parent = needs_parent(resolution); let num_features = state.data.num_features; let feature_data = &state.data.feature_data; let travel_data = load_travel_data(&state.travel_time_store, &travel_entries)?; let has_travel = !travel_entries.is_empty(); let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001); let mut h3_cache: FxHashMap = FxHashMap::default(); let mut area_rows: Vec = Vec::new(); let mut matching_rows: Vec = Vec::new(); state .grid .for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| { let row = row_idx as usize; if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache) != cell_u64 { return; } area_rows.push(row); if row_passes_filters( row, &parsed_filters, &parsed_enum_filters, feature_data, num_features, ) && (!has_poi_filters || row_passes_poi_filters(row, &parsed_poi_filters, &state.data.poi_metrics)) { if has_travel && !row_passes_travel_filters( state.data.postcode(row), &travel_entries, &travel_data, ) { return; } matching_rows.push(row); } }); let total_count = matching_rows.len(); let filter_exclusions = if total_count == 0 { top_filter_exclusions( &area_rows, &parsed_filters, &parsed_enum_filters, &parsed_poi_filters, &travel_entries, &travel_data, &state.data, ) } else { Vec::new() }; // Pick central_postcode: prefer the postcode with the shortest travel time // for the requested journey destination (so it has journey data). Fall back // to geographic proximity to the hexagon center. let central_postcode = if !matching_rows.is_empty() { if let Some(ref travel_data) = journey_travel_data { // Find the row with the shortest travel time in the travel data let best_row = matching_rows .iter() .copied() .filter_map(|row| { let pc = state.data.postcode(row); travel_data.get(pc).map(|td| (row, td.minutes)) }) .min_by_key(|&(_, mins)| mins) .map(|(row, _)| row); // Fall back to geographic center if no row has travel data let row = best_row.unwrap_or_else(|| { let center: h3o::LatLng = cell.into(); let center_lat = center.lat() as f32; let center_lon = center.lng() as f32; matching_rows .iter() .copied() .min_by(|&a, &b| { let da = (state.data.lat[a] - center_lat).powi(2) + (state.data.lon[a] - center_lon).powi(2); let db = (state.data.lat[b] - center_lat).powi(2) + (state.data.lon[b] - center_lon).powi(2); da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal) }) .expect("matching_rows is non-empty") }); Some(state.data.postcode(row).to_string()) } else { // No journey destination requested — use geographic center let center: h3o::LatLng = cell.into(); let center_lat = center.lat() as f32; let center_lon = center.lng() as f32; let closest_row = matching_rows .iter() .copied() .min_by(|&a, &b| { let da = (state.data.lat[a] - center_lat).powi(2) + (state.data.lon[a] - center_lon).powi(2); let db = (state.data.lat[b] - center_lat).powi(2) + (state.data.lon[b] - center_lon).powi(2); da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal) }) .expect("matching_rows is non-empty"); Some(state.data.postcode(closest_row).to_string()) } } else { None }; let price_history = stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index); let (mut numeric_features, enum_features_out) = stats::compute_feature_stats( &matching_rows, &state.data, &state.data.feature_names, &state.data.enum_values, &state.data.feature_stats, fields_specified, &field_set, ); numeric_features.extend(stats::compute_poi_feature_stats( &matching_rows, &state.data.poi_metrics, fields_specified, &field_set, )); let elapsed = start_time.elapsed(); info!( h3 = %h3_str, resolution, total_count, filters = num_filters, filters_raw = filters_str.as_deref().unwrap_or("-"), travel_entries = travel_entries.len(), ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0), "GET /api/hexagon-stats" ); Ok(HexagonStatsResponse { count: total_count, numeric_features, enum_features: enum_features_out, price_history, central_postcode, filter_exclusions, }) }) .await .map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()).into_response())? .map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error).into_response())?; Ok(Json(response)) } #[cfg(test)] mod tests { use super::*; #[test] fn default_area_stat_fields_skip_amenities() { let (fields_specified, field_set) = parse_area_stats_field_set(None); assert!(fields_specified); assert!(field_set.contains("Property type")); assert!(field_set.contains("Street tree density percentile")); assert!(field_set.contains("Noise (dB)")); assert!(!field_set.contains("Max available download speed (Mbps)")); assert!(!field_set.contains("Distance to nearest amenity (Cafe) (km)")); } #[test] fn explicit_area_stat_fields_are_respected() { let (fields_specified, field_set) = parse_area_stats_field_set(Some("Noise (dB);;Property type")); assert!(fields_specified); assert!(field_set.contains("Noise (dB)")); assert!(field_set.contains("Property type")); assert_eq!(field_set.len(), 2); } }