252 lines
9.1 KiB
Rust
252 lines
9.1 KiB
Rust
use std::collections::HashMap;
|
|
use std::str::FromStr;
|
|
use std::sync::Arc;
|
|
|
|
use axum::extract::{Query, State};
|
|
use axum::http::StatusCode;
|
|
use axum::response::{IntoResponse, Json};
|
|
use axum::Extension;
|
|
use rustc_hash::FxHashMap;
|
|
use serde::{Deserialize, Serialize};
|
|
use tracing::{info, warn};
|
|
|
|
use crate::auth::OptionalUser;
|
|
use crate::licensing::check_license_bounds;
|
|
use crate::parsing::{
|
|
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters,
|
|
row_passes_filters, validate_h3_resolution,
|
|
};
|
|
use crate::state::SharedState;
|
|
|
|
use super::stats;
|
|
|
|
#[derive(Serialize)]
|
|
pub struct HistogramStats {
|
|
pub min: f64,
|
|
pub max: f64,
|
|
/// 1st percentile (left edge of main distribution)
|
|
pub p1: f64,
|
|
/// 99th percentile (right edge of main distribution)
|
|
pub p99: f64,
|
|
pub counts: Vec<u64>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct NumericFeatureStats {
|
|
pub name: String,
|
|
pub count: usize,
|
|
pub min: f64,
|
|
pub max: f64,
|
|
pub mean: f64,
|
|
pub histogram: HistogramStats,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct EnumFeatureStats {
|
|
pub name: String,
|
|
pub counts: HashMap<String, u64>,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct PricePoint {
|
|
pub year: f32,
|
|
pub price: f32,
|
|
}
|
|
|
|
#[derive(Serialize)]
|
|
pub struct HexagonStatsResponse {
|
|
pub count: usize,
|
|
pub numeric_features: Vec<NumericFeatureStats>,
|
|
pub enum_features: Vec<EnumFeatureStats>,
|
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
|
pub price_history: Vec<PricePoint>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub central_postcode: Option<String>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct HexagonStatsParams {
|
|
pub h3: String,
|
|
pub resolution: u8,
|
|
pub filters: Option<String>,
|
|
/// Comma-separated feature names to include in stats response.
|
|
/// Only listed features are computed; if absent or empty, no features are returned.
|
|
pub fields: Option<String>,
|
|
/// When set (with journey_slug), pick central_postcode as the postcode with the
|
|
/// shortest travel time for this mode+slug (so it has journey data).
|
|
pub journey_mode: Option<String>,
|
|
pub journey_slug: Option<String>,
|
|
}
|
|
|
|
pub async fn get_hexagon_stats(
|
|
State(shared): State<Arc<SharedState>>,
|
|
Extension(user): Extension<OptionalUser>,
|
|
Query(params): Query<HexagonStatsParams>,
|
|
) -> Result<Json<HexagonStatsResponse>, axum::response::Response> {
|
|
let state = shared.load_state();
|
|
let cell = h3o::CellIndex::from_str(¶ms.h3).map_err(|error| {
|
|
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
|
|
(
|
|
StatusCode::BAD_REQUEST,
|
|
format!("Invalid H3 cell: {}", error),
|
|
)
|
|
.into_response()
|
|
})?;
|
|
let cell_u64: u64 = cell.into();
|
|
|
|
let resolution = params.resolution;
|
|
validate_h3_resolution(resolution).map_err(IntoResponse::into_response)?;
|
|
|
|
// License check using H3 cell bounds
|
|
let h3_bounds = h3_cell_bounds(cell, 0.0);
|
|
check_license_bounds(&user.0, h3_bounds)?;
|
|
|
|
let h3_str = params.h3;
|
|
let quant = state.data.quant_ref();
|
|
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
|
params.filters.as_deref(),
|
|
&state.feature_name_to_index,
|
|
&state.data.enum_values,
|
|
&quant,
|
|
)
|
|
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
|
|
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
|
let filters_str = params.filters;
|
|
|
|
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
|
|
|
|
// Load travel time data for central_postcode selection (if requested)
|
|
let journey_travel_data = match (¶ms.journey_mode, ¶ms.journey_slug) {
|
|
(Some(mode), Some(slug)) if state.travel_time_store.has_destination(mode, slug) => {
|
|
state.travel_time_store.get(mode, slug).ok()
|
|
}
|
|
_ => None,
|
|
};
|
|
|
|
let response = tokio::task::spawn_blocking(move || {
|
|
let start_time = std::time::Instant::now();
|
|
let precomputed = &state.h3_cells;
|
|
let h3_res = h3o::Resolution::try_from(resolution)
|
|
.map_err(|err| format!("Invalid H3 resolution {}: {}", resolution, err))?;
|
|
let need_parent = needs_parent(resolution);
|
|
let num_features = state.data.num_features;
|
|
let feature_data = &state.data.feature_data;
|
|
|
|
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
|
|
|
|
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
|
|
let mut matching_rows: Vec<usize> = Vec::new();
|
|
state
|
|
.grid
|
|
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
|
|
let row = row_idx as usize;
|
|
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
|
|
== cell_u64
|
|
&& row_passes_filters(
|
|
row,
|
|
&parsed_filters,
|
|
&parsed_enum_filters,
|
|
feature_data,
|
|
num_features,
|
|
)
|
|
{
|
|
matching_rows.push(row);
|
|
}
|
|
});
|
|
|
|
let total_count = matching_rows.len();
|
|
|
|
// Pick central_postcode: prefer the postcode with the shortest travel time
|
|
// for the requested journey destination (so it has journey data). Fall back
|
|
// to geographic proximity to the hexagon center.
|
|
let central_postcode = if !matching_rows.is_empty() {
|
|
if let Some(ref travel_data) = journey_travel_data {
|
|
// Find the row with the shortest travel time in the travel data
|
|
let best_row = matching_rows
|
|
.iter()
|
|
.copied()
|
|
.filter_map(|row| {
|
|
let pc = state.data.postcode(row);
|
|
travel_data.get(pc).map(|td| (row, td.minutes))
|
|
})
|
|
.min_by_key(|&(_, mins)| mins)
|
|
.map(|(row, _)| row);
|
|
|
|
// Fall back to geographic center if no row has travel data
|
|
let row = best_row.unwrap_or_else(|| {
|
|
let center: h3o::LatLng = cell.into();
|
|
let center_lat = center.lat() as f32;
|
|
let center_lon = center.lng() as f32;
|
|
matching_rows
|
|
.iter()
|
|
.copied()
|
|
.min_by(|&a, &b| {
|
|
let da = (state.data.lat[a] - center_lat).powi(2)
|
|
+ (state.data.lon[a] - center_lon).powi(2);
|
|
let db = (state.data.lat[b] - center_lat).powi(2)
|
|
+ (state.data.lon[b] - center_lon).powi(2);
|
|
da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
|
|
})
|
|
.expect("matching_rows is non-empty")
|
|
});
|
|
Some(state.data.postcode(row).to_string())
|
|
} else {
|
|
// No journey destination requested — use geographic center
|
|
let center: h3o::LatLng = cell.into();
|
|
let center_lat = center.lat() as f32;
|
|
let center_lon = center.lng() as f32;
|
|
let closest_row = matching_rows
|
|
.iter()
|
|
.copied()
|
|
.min_by(|&a, &b| {
|
|
let da = (state.data.lat[a] - center_lat).powi(2)
|
|
+ (state.data.lon[a] - center_lon).powi(2);
|
|
let db = (state.data.lat[b] - center_lat).powi(2)
|
|
+ (state.data.lon[b] - center_lon).powi(2);
|
|
da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
|
|
})
|
|
.expect("matching_rows is non-empty");
|
|
Some(state.data.postcode(closest_row).to_string())
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let price_history =
|
|
stats::extract_price_history(&matching_rows, &state.data, &state.feature_name_to_index);
|
|
|
|
let (numeric_features, enum_features_out) = stats::compute_feature_stats(
|
|
&matching_rows,
|
|
&state.data,
|
|
&state.data.feature_names,
|
|
&state.data.enum_values,
|
|
&state.data.feature_stats,
|
|
fields_specified,
|
|
&field_set,
|
|
);
|
|
|
|
let elapsed = start_time.elapsed();
|
|
info!(
|
|
h3 = %h3_str,
|
|
resolution,
|
|
total_count,
|
|
filters = num_filters,
|
|
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
|
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
|
|
"GET /api/hexagon-stats"
|
|
);
|
|
|
|
Ok(HexagonStatsResponse {
|
|
count: total_count,
|
|
numeric_features,
|
|
enum_features: enum_features_out,
|
|
price_history,
|
|
central_postcode,
|
|
})
|
|
})
|
|
.await
|
|
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()).into_response())?
|
|
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error).into_response())?;
|
|
|
|
Ok(Json(response))
|
|
}
|