perfect-postcode/server-rs/src/routes/hexagon_stats.rs
2026-03-17 21:08:32 +00:00

252 lines
9.1 KiB
Rust

use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use axum::Extension;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::licensing::check_license_bounds;
use crate::parsing::{
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters,
row_passes_filters, validate_h3_resolution,
};
use crate::state::SharedState;
use super::stats;
#[derive(Serialize)]
pub struct HistogramStats {
pub min: f64,
pub max: f64,
/// 1st percentile (left edge of main distribution)
pub p1: f64,
/// 99th percentile (right edge of main distribution)
pub p99: f64,
pub counts: Vec<u64>,
}
#[derive(Serialize)]
pub struct NumericFeatureStats {
pub name: String,
pub count: usize,
pub min: f64,
pub max: f64,
pub mean: f64,
pub histogram: HistogramStats,
}
#[derive(Serialize)]
pub struct EnumFeatureStats {
pub name: String,
pub counts: HashMap<String, u64>,
}
#[derive(Serialize)]
pub struct PricePoint {
pub year: f32,
pub price: f32,
}
#[derive(Serialize)]
pub struct HexagonStatsResponse {
pub count: usize,
pub numeric_features: Vec<NumericFeatureStats>,
pub enum_features: Vec<EnumFeatureStats>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub price_history: Vec<PricePoint>,
#[serde(skip_serializing_if = "Option::is_none")]
pub central_postcode: Option<String>,
}
#[derive(Deserialize)]
pub struct HexagonStatsParams {
pub h3: String,
pub resolution: u8,
pub filters: Option<String>,
/// Comma-separated feature names to include in stats response.
/// Only listed features are computed; if absent or empty, no features are returned.
pub fields: Option<String>,
/// When set (with journey_slug), pick central_postcode as the postcode with the
/// shortest travel time for this mode+slug (so it has journey data).
pub journey_mode: Option<String>,
pub journey_slug: Option<String>,
}
pub async fn get_hexagon_stats(
State(shared): State<Arc<SharedState>>,
Extension(user): Extension<OptionalUser>,
Query(params): Query<HexagonStatsParams>,
) -> Result<Json<HexagonStatsResponse>, axum::response::Response> {
let state = shared.load_state();
let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
(
StatusCode::BAD_REQUEST,
format!("Invalid H3 cell: {}", error),
)
.into_response()
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution;
validate_h3_resolution(resolution).map_err(IntoResponse::into_response)?;
// License check using H3 cell bounds
let h3_bounds = h3_cell_bounds(cell, 0.0);
check_license_bounds(&user.0, h3_bounds)?;
let h3_str = params.h3;
let quant = state.data.quant_ref();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
&quant,
)
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let filters_str = params.filters;
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
// Load travel time data for central_postcode selection (if requested)
let journey_travel_data = match (&params.journey_mode, &params.journey_slug) {
(Some(mode), Some(slug)) if state.travel_time_store.has_destination(mode, slug) => {
state.travel_time_store.get(mode, slug).ok()
}
_ => None,
};
let response = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let precomputed = &state.h3_cells;
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
let need_parent = needs_parent(resolution);
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
== cell_u64
&& row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
)
{
matching_rows.push(row);
}
});
let total_count = matching_rows.len();
// Pick central_postcode: prefer the postcode with the shortest travel time
// for the requested journey destination (so it has journey data). Fall back
// to geographic proximity to the hexagon center.
let central_postcode = if !matching_rows.is_empty() {
if let Some(ref travel_data) = journey_travel_data {
// Find the row with the shortest travel time in the travel data
let best_row = matching_rows
.iter()
.copied()
.filter_map(|row| {
let pc = state.data.postcode(row);
travel_data.get(pc).map(|td| (row, td.minutes))
})
.min_by_key(|&(_, mins)| mins)
.map(|(row, _)| row);
// Fall back to geographic center if no row has travel data
let row = best_row.unwrap_or_else(|| {
let center: h3o::LatLng = cell.into();
let center_lat = center.lat() as f32;
let center_lon = center.lng() as f32;
matching_rows
.iter()
.copied()
.min_by(|&a, &b| {
let da = (state.data.lat[a] - center_lat).powi(2)
+ (state.data.lon[a] - center_lon).powi(2);
let db = (state.data.lat[b] - center_lat).powi(2)
+ (state.data.lon[b] - center_lon).powi(2);
da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
})
.expect("matching_rows is non-empty")
});
Some(state.data.postcode(row).to_string())
} else {
// No journey destination requested — use geographic center
let center: h3o::LatLng = cell.into();
let center_lat = center.lat() as f32;
let center_lon = center.lng() as f32;
let closest_row = matching_rows
.iter()
.copied()
.min_by(|&a, &b| {
let da = (state.data.lat[a] - center_lat).powi(2)
+ (state.data.lon[a] - center_lon).powi(2);
let db = (state.data.lat[b] - center_lat).powi(2)
+ (state.data.lon[b] - center_lon).powi(2);
da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
})
.expect("matching_rows is non-empty");
Some(state.data.postcode(closest_row).to_string())
}
} else {
None
};
let price_history =
stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index);
let (numeric_features, enum_features_out) = stats::compute_feature_stats(
&matching_rows,
&state.data,
&state.data.feature_names,
&state.data.enum_values,
&state.data.feature_stats,
fields_specified,
&field_set,
);
let elapsed = start_time.elapsed();
info!(
h3 = %h3_str,
resolution,
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-stats"
);
Ok(HexagonStatsResponse {
count: total_count,
numeric_features,
enum_features: enum_features_out,
price_history,
central_postcode,
})
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()).into_response())?
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error).into_response())?;
Ok(Json(response))
}