Test changes
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 8m20s
CI / Check (push) Failing after 10m40s

This commit is contained in:
Andras Schmelczer 2026-05-09 11:35:38 +01:00
parent 4c95815dc8
commit be02fc16bb
41 changed files with 4224 additions and 759 deletions

View file

@ -1,6 +1,7 @@
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use std::time::Duration;
use axum::extract::{Query, State};
use axum::http::{header, HeaderMap, StatusCode};
@ -13,14 +14,18 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::NAN_U16;
use crate::data::QuantRef;
use crate::features::INTEGER_BIN_FEATURES;
use crate::data::{PostcodePoiMetrics, QuantRef};
use crate::features;
use crate::licensing::check_license_bounds;
use crate::parsing::{parse_field_indices, parse_filters, require_bounds, row_passes_filters};
use crate::parsing::{
parse_field_indices_with_poi, parse_filters_with_poi, require_bounds, row_passes_filters,
row_passes_poi_filters,
};
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
use crate::state::SharedState;
const MAX_EXPORT_POSTCODES: usize = 250;
const EXPORT_SCREENSHOT_TIMEOUT_SECS: u64 = 12;
/// Height (in pixels) reserved for the screenshot row
const IMAGE_ROW_HEIGHT: f64 = 225.0;
@ -41,11 +46,11 @@ struct PostcodeExportAgg {
}
impl PostcodeExportAgg {
fn new(num_features: usize) -> Self {
fn new(total_features: usize) -> Self {
Self {
count: 0,
sums: vec![0.0; num_features],
finite_counts: vec![0; num_features],
sums: vec![0.0; total_features],
finite_counts: vec![0; total_features],
enum_freqs: FxHashMap::default(),
}
}
@ -58,6 +63,7 @@ impl PostcodeExportAgg {
num_features: usize,
enum_indices: &FxHashMap<usize, ()>,
quant: &QuantRef,
poi_metrics: &PostcodePoiMetrics,
) {
self.count += 1;
let base = row * num_features;
@ -79,6 +85,18 @@ impl PostcodeExportAgg {
self.finite_counts[feat_idx] += 1;
}
}
let poi_offset = num_features;
for metric_idx in 0..poi_metrics.num_features() {
let raw = poi_metrics.raw_for_property_row(row, metric_idx);
if raw == NAN_U16 {
continue;
}
let value = poi_metrics.decode_raw(metric_idx, raw);
let out_idx = poi_offset + metric_idx;
self.sums[out_idx] += value as f64;
self.finite_counts[out_idx] += 1;
}
}
}
@ -138,13 +156,17 @@ pub async fn get_export(
check_license_bounds(&user.0, (south, west, north, east), None)?;
let quant = state.data.quant_ref();
let (parsed_filters, parsed_enum_filters) = parse_filters(
let poi_quant = state.data.poi_metrics.quant_ref();
let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
&quant,
&state.data.poi_metrics.name_to_index,
&poi_quant,
)
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let has_poi_filters = !parsed_poi_filters.is_empty();
let filters_str = params.filters;
let fields_str = params.fields;
@ -164,16 +186,28 @@ pub async fn get_export(
// Fetch screenshot (async, before spawn_blocking)
let auth_header = headers.get(header::AUTHORIZATION);
let screenshot_bytes = match fetch_screenshot_bytes(&state, &frontend_params, auth_header).await
let screenshot_fetch = fetch_screenshot_bytes(&state, &frontend_params, auth_header);
let screenshot_bytes = match tokio::time::timeout(
Duration::from_secs(EXPORT_SCREENSHOT_TIMEOUT_SECS),
screenshot_fetch,
)
.await
{
Ok(bytes) => {
Ok(Ok(bytes)) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes)
}
Err(err) => {
Ok(Err(err)) => {
warn!("Screenshot failed for export: {err}");
None
}
Err(_) => {
warn!(
timeout_secs = EXPORT_SCREENSHOT_TIMEOUT_SECS,
"Screenshot timed out for export"
);
None
}
};
// Build feature name → description map from the precomputed features response
@ -200,6 +234,9 @@ pub async fn get_export(
let feature_names = &state.data.feature_names;
let enum_values = &state.data.enum_values;
let postcode_data = &state.postcode_data;
let poi_metrics = &state.data.poi_metrics;
let poi_offset = num_features;
let total_export_features = num_features + poi_metrics.num_features();
// Build set of enum feature indices for quick lookup
let enum_indices: FxHashMap<usize, ()> = enum_values.keys().map(|&idx| (idx, ())).collect();
@ -219,6 +256,10 @@ pub async fn get_export(
) {
return;
}
if has_poi_filters && !row_passes_poi_filters(row, &parsed_poi_filters, poi_metrics)
{
return;
}
let postcode = state.data.postcode(row);
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
postcode_rows.entry(pc_idx).or_default().push(row);
@ -229,9 +270,16 @@ pub async fn get_export(
let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> =
Vec::with_capacity(postcode_rows.len());
for (pc_idx, rows) in postcode_rows {
let mut agg = PostcodeExportAgg::new(num_features);
let mut agg = PostcodeExportAgg::new(total_export_features);
for &row in &rows {
agg.add_row(feature_data, row, num_features, &enum_indices, &quant);
agg.add_row(
feature_data,
row,
num_features,
&enum_indices,
&quant,
poi_metrics,
);
}
if agg.count > 0 {
postcode_aggs.push((pc_idx, agg));
@ -265,14 +313,19 @@ pub async fn get_export(
// Determine column order: filter features first, then remaining
let filter_feature_names = extract_filter_feature_names(filters_str.as_deref());
let field_indices =
parse_field_indices(fields_str.as_deref(), &state.feature_name_to_index)
.map_err(|err| err.1)?;
let field_indices = parse_field_indices_with_poi(
fields_str.as_deref(),
&state.feature_name_to_index,
&state.data.poi_metrics.name_to_index,
)
.map_err(|err| err.1)?;
let all_feature_indices: Vec<usize> = if let Some(ref indices) = field_indices {
indices.clone()
let all_feature_indices: Vec<usize> = if let Some(ref indices) = field_indices.normal {
let mut selected = indices.clone();
selected.extend(field_indices.poi.iter().map(|idx| poi_offset + *idx));
selected
} else {
let mut ordered = Vec::with_capacity(num_features);
let mut ordered = Vec::with_capacity(total_export_features);
let mut used = FxHashSet::default();
for name in &filter_feature_names {
@ -280,6 +333,11 @@ pub async fn get_export(
if used.insert(idx) {
ordered.push(idx);
}
} else if let Some(&idx) = state.data.poi_metrics.name_to_index.get(name.as_str()) {
let virtual_idx = poi_offset + idx;
if used.insert(virtual_idx) {
ordered.push(virtual_idx);
}
}
}
for idx in 0..num_features {
@ -287,15 +345,42 @@ pub async fn get_export(
ordered.push(idx);
}
}
for idx in 0..poi_metrics.num_features() {
let virtual_idx = poi_offset + idx;
if used.insert(virtual_idx) {
ordered.push(virtual_idx);
}
}
ordered
};
// Filter-only feature indices for the Selected sheet
let filter_feature_indices: Vec<usize> = filter_feature_names
.iter()
.filter_map(|name| state.feature_name_to_index.get(name.as_str()).copied())
.filter_map(|name| {
state
.feature_name_to_index
.get(name.as_str())
.copied()
.or_else(|| {
state
.data
.poi_metrics
.name_to_index
.get(name.as_str())
.map(|idx| poi_offset + *idx)
})
})
.collect();
let feature_name_for_idx = |idx: usize| -> &str {
if idx < num_features {
&feature_names[idx]
} else {
&poi_metrics.feature_names[idx - poi_offset]
}
};
// Build feature unit map (feat_idx → (prefix, suffix)) for number formatting
let feature_units: FxHashMap<usize, (&str, &str)> = state
.features_response
@ -309,16 +394,25 @@ pub async fn get_export(
suffix,
..
} => {
let idx = state.feature_name_to_index.get(name.as_str())?;
Some((*idx, (*prefix, *suffix)))
if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) {
Some((idx, (*prefix, *suffix)))
} else {
state
.data
.poi_metrics
.name_to_index
.get(name.as_str())
.map(|idx| (poi_offset + *idx, (*prefix, *suffix)))
}
}
_ => None,
})
.collect();
let integer_feature_indices: FxHashSet<usize> = INTEGER_BIN_FEATURES
let integer_feature_indices: FxHashSet<usize> = all_feature_indices
.iter()
.filter_map(|name| state.feature_name_to_index.get(*name).copied())
.copied()
.filter(|&idx| features::has_integer_bins(feature_name_for_idx(idx)))
.collect();
// Build Excel number formats per feature index for unit display
@ -435,7 +529,7 @@ pub async fn get_export(
.write_string_with_format(
header_row,
col,
&feature_names[feat_idx],
feature_name_for_idx(feat_idx),
&header_fmt,
)
.map_err(|e| format!("Failed to write header: {e}"))?;
@ -453,7 +547,7 @@ pub async fn get_export(
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
let desc = feature_descriptions
.get(&feature_names[feat_idx])
.get(feature_name_for_idx(feat_idx))
.map(String::as_str)
.unwrap_or("");
sheet
@ -477,7 +571,7 @@ pub async fn get_export(
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
if enum_indices.contains_key(&feat_idx) {
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
if let Some((&mode_bits, _)) =
freqs.iter().max_by_key(|(_, &count)| count)
@ -543,7 +637,7 @@ pub async fn get_export(
.map_err(|e| format!("Failed to set column width: {e}"))?;
for col_offset in 0..feat_indices.len() {
let col = (col_offset + 2) as u16;
let feat_name = &feature_names[feat_indices[col_offset]];
let feat_name = feature_name_for_idx(feat_indices[col_offset]);
let width = (feat_name.len() as f64 * 1.1).clamp(10.0, 30.0);
sheet
.set_column_width(col, width)