Format and lint

This commit is contained in:
Andras Schmelczer 2026-02-08 12:37:07 +00:00
parent 42ee2d4c51
commit 04a78e7bfe
75 changed files with 1290 additions and 719 deletions

View file

@ -0,0 +1,268 @@
use std::collections::HashMap;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use serde::Deserialize;
use tracing::{info, warn};
use crate::parsing::{parse_filters, row_passes_filters};
use crate::state::AppState;
use super::hexagon_stats::{
EnumFeatureStats, HexagonStatsResponse, HistogramStats, NumericFeatureStats, PricePoint,
};
#[derive(Deserialize)]
pub struct PostcodeStatsParams {
pub postcode: String,
pub filters: Option<String>,
/// Comma-separated feature names to include in stats response.
/// Only listed features are computed; if absent or empty, no features are returned.
pub fields: Option<String>,
}
pub async fn get_postcode_stats(
state: Arc<AppState>,
Query(params): Query<PostcodeStatsParams>,
) -> Result<Json<HexagonStatsResponse>, (StatusCode, String)> {
// Normalize postcode: uppercase, collapse whitespace
let normalized = params
.postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Look up postcode centroid for spatial search
let pc_idx = match state.postcode_data.postcode_to_idx.get(&normalized) {
Some(&idx) => idx,
None => {
warn!(postcode = %normalized, "Postcode not found");
return Err((
StatusCode::NOT_FOUND,
format!("Postcode not found: {}", normalized),
));
}
};
let (centroid_lat, centroid_lon) = state.postcode_data.centroids[pc_idx];
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let fields_specified = params.fields.is_some();
let field_set: std::collections::HashSet<String> = params
.fields
.as_ref()
.map(|fields_str| {
fields_str
.split(',')
.map(|field| field.trim().to_string())
.filter(|field| !field.is_empty())
.collect()
})
.unwrap_or_default();
let postcode_str = normalized.clone();
let response = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
// Search ±0.02° around centroid (~2km, generous for a postcode)
let offset: f64 = 0.02;
let min_lat = centroid_lat as f64 - offset;
let max_lat = centroid_lat as f64 + offset;
let min_lon = centroid_lon as f64 - offset;
let max_lon = centroid_lon as f64 + offset;
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
let row_postcode = state.data.postcode(row);
if row_postcode == postcode_str
&& row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
)
{
matching_rows.push(row);
}
});
let total_count = matching_rows.len();
// Collect price history (year, price) pairs
let price_history = {
let year_idx = state
.feature_name_to_index
.get("Date of last transaction")
.copied();
let price_idx = state.feature_name_to_index.get("Last known price").copied();
match (year_idx, price_idx) {
(Some(yi), Some(pi)) => {
let mut points: Vec<PricePoint> = matching_rows
.iter()
.filter_map(|&row| {
let year = feature_data[row * num_features + yi];
let price = feature_data[row * num_features + pi];
if year.is_finite() && price.is_finite() {
Some(PricePoint { year, price })
} else {
None
}
})
.collect();
// Cap at 5000 points by evenly sampling
if points.len() > 5000 {
let step = points.len() as f64 / 5000.0;
points = (0..5000)
.map(|i| {
let idx = (i as f64 * step) as usize;
PricePoint {
year: points[idx].year,
price: points[idx].price,
}
})
.collect();
}
points
}
_ => Vec::new(),
}
};
let mut numeric_features = Vec::new();
let mut enum_features_out = Vec::new();
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
if fields_specified && !field_set.contains(feature_name.as_str()) {
continue;
}
if let Some(enum_values) = state.data.enum_values.get(&feature_index) {
// Enum feature: count occurrences of each value
let mut value_counts = vec![0u64; enum_values.len()];
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
let idx = value as usize;
if idx < value_counts.len() {
value_counts[idx] += 1;
}
}
}
let counts: HashMap<String, u64> = value_counts
.iter()
.enumerate()
.filter(|(_, &count)| count > 0)
.map(|(idx, &count)| (enum_values[idx].clone(), count))
.collect();
if !counts.is_empty() {
enum_features_out.push(EnumFeatureStats {
name: feature_name.clone(),
counts,
});
}
} else {
// Numeric feature: compute stats and histogram
let global_hist = &state.data.feature_stats[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
let num_bins = global_hist.counts.len();
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; num_bins];
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
0.0
};
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value as f64;
let bin = if value < p1 {
0
} else if value >= p99 {
num_bins - 1
} else if middle_width > 0.0 {
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(num_bins - 2)
} else {
num_bins / 2
};
bins[bin] += 1;
}
}
if count > 0 {
numeric_features.push(NumericFeatureStats {
name: feature_name.clone(),
count,
min: min_value as f64,
max: max_value as f64,
mean: sum / count as f64,
histogram: HistogramStats {
min: global_hist.min as f64,
max: global_hist.max as f64,
p1: p1 as f64,
p99: p99 as f64,
counts: bins,
},
});
}
}
}
let elapsed = start_time.elapsed();
info!(
postcode = %postcode_str,
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/postcode-stats"
);
Ok(HexagonStatsResponse {
count: total_count,
numeric_features,
enum_features: enum_features_out,
price_history,
})
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok(Json(response))
}