perfect-postcode/server-rs/src/routes/export.rs
Andras Schmelczer be02fc16bb
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 8m20s
CI / Check (push) Failing after 10m40s
Test changes
2026-05-09 11:35:38 +01:00

682 lines
25 KiB
Rust

use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use std::time::Duration;
use axum::extract::{Query, State};
use axum::http::{header, HeaderMap, StatusCode};
use axum::response::IntoResponse;
use axum::Extension;
use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook};
use rustc_hash::{FxHashMap, FxHashSet};
use serde::Deserialize;
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::NAN_U16;
use crate::data::{PostcodePoiMetrics, QuantRef};
use crate::features;
use crate::licensing::check_license_bounds;
use crate::parsing::{
parse_field_indices_with_poi, parse_filters_with_poi, require_bounds, row_passes_filters,
row_passes_poi_filters,
};
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
use crate::state::SharedState;
const MAX_EXPORT_POSTCODES: usize = 250;
const EXPORT_SCREENSHOT_TIMEOUT_SECS: u64 = 12;
/// Height (in pixels) reserved for the screenshot row
const IMAGE_ROW_HEIGHT: f64 = 225.0;
#[derive(Deserialize)]
pub struct ExportParams {
bounds: Option<String>,
filters: Option<String>,
fields: Option<String>,
}
/// Per-postcode accumulator for export aggregation (mean for numeric, mode for enum).
struct PostcodeExportAgg {
count: u32,
sums: Vec<f64>,
finite_counts: Vec<u32>,
/// feat_idx -> (value_bits -> count) for enum mode calculation
enum_freqs: FxHashMap<usize, FxHashMap<u32, u32>>,
}
impl PostcodeExportAgg {
fn new(total_features: usize) -> Self {
Self {
count: 0,
sums: vec![0.0; total_features],
finite_counts: vec![0; total_features],
enum_freqs: FxHashMap::default(),
}
}
#[inline]
fn add_row(
&mut self,
feature_data: &[u16],
row: usize,
num_features: usize,
enum_indices: &FxHashMap<usize, ()>,
quant: &QuantRef,
poi_metrics: &PostcodePoiMetrics,
) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (feat_idx, &raw) in row_slice.iter().enumerate() {
if raw == NAN_U16 {
continue;
}
let value = quant.decode(feat_idx, raw);
if enum_indices.contains_key(&feat_idx) {
*self
.enum_freqs
.entry(feat_idx)
.or_default()
.entry(value.to_bits())
.or_insert(0) += 1;
} else {
self.sums[feat_idx] += value as f64;
self.finite_counts[feat_idx] += 1;
}
}
let poi_offset = num_features;
for metric_idx in 0..poi_metrics.num_features() {
let raw = poi_metrics.raw_for_property_row(row, metric_idx);
if raw == NAN_U16 {
continue;
}
let value = poi_metrics.decode_raw(metric_idx, raw);
let out_idx = poi_offset + metric_idx;
self.sums[out_idx] += value as f64;
self.finite_counts[out_idx] += 1;
}
}
}
/// Extract feature names referenced in the filters param (preserving order).
fn extract_filter_feature_names(filters_str: Option<&str>) -> Vec<String> {
let input = match filters_str.filter(|text| !text.is_empty()) {
Some(text) => text,
None => return Vec::new(),
};
let mut names = Vec::new();
for entry in input.split(";;") {
let parts: Vec<&str> = entry.splitn(2, ':').collect();
if parts.len() == 2 {
let name = parts[0].trim().to_string();
if !names.contains(&name) {
names.push(name);
}
}
}
names
}
/// Build frontend-style query params for screenshot/dashboard URLs.
fn build_frontend_params(
center_lat: f64,
center_lon: f64,
zoom: f64,
filters_str: Option<&str>,
) -> String {
let mut parts = vec![
format!("lat={:.4}", center_lat),
format!("lon={:.4}", center_lon),
format!("zoom={:.1}", zoom),
];
if let Some(fs) = filters_str {
if !fs.is_empty() {
for entry in fs.split(";;") {
if !entry.is_empty() {
parts.push(format!("filter={}", urlencoding::encode(entry.trim())));
}
}
}
}
parts.join("&")
}
pub async fn get_export(
State(shared): State<Arc<SharedState>>,
headers: HeaderMap,
Extension(user): Extension<OptionalUser>,
Query(params): Query<ExportParams>,
) -> Result<impl IntoResponse, axum::response::Response> {
let state = shared.load_state();
let (south, west, north, east) =
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
check_license_bounds(&user.0, (south, west, north, east), None)?;
let quant = state.data.quant_ref();
let poi_quant = state.data.poi_metrics.quant_ref();
let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
&quant,
&state.data.poi_metrics.name_to_index,
&poi_quant,
)
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let has_poi_filters = !parsed_poi_filters.is_empty();
let filters_str = params.filters;
let fields_str = params.fields;
let public_url = state.public_url.clone();
// Compute view center for screenshot and dashboard URL
let center_lat = (south + north) / 2.0;
let center_lon = (west + east) / 2.0;
let lat_span = north - south;
let zoom = if lat_span > 0.0 {
(360.0 / lat_span).log2().clamp(1.0, 18.0)
} else {
12.0
};
let frontend_params =
build_frontend_params(center_lat, center_lon, zoom, filters_str.as_deref());
// Fetch screenshot (async, before spawn_blocking)
let auth_header = headers.get(header::AUTHORIZATION);
let screenshot_fetch = fetch_screenshot_bytes(&state, &frontend_params, auth_header);
let screenshot_bytes = match tokio::time::timeout(
Duration::from_secs(EXPORT_SCREENSHOT_TIMEOUT_SECS),
screenshot_fetch,
)
.await
{
Ok(Ok(bytes)) => {
info!(bytes = bytes.len(), "Fetched screenshot for export");
Some(bytes)
}
Ok(Err(err)) => {
warn!("Screenshot failed for export: {err}");
None
}
Err(_) => {
warn!(
timeout_secs = EXPORT_SCREENSHOT_TIMEOUT_SECS,
"Screenshot timed out for export"
);
None
}
};
// Build feature name → description map from the precomputed features response
let feature_descriptions: FxHashMap<String, String> = state
.features_response
.groups
.iter()
.flat_map(|group| &group.features)
.map(|feat| match feat {
FeatureInfo::Numeric {
name, description, ..
} => (name.clone(), description.to_string()),
FeatureInfo::Enum {
name, description, ..
} => (name.clone(), description.to_string()),
})
.collect();
let bytes = tokio::task::spawn_blocking(move || -> Result<Vec<u8>, String> {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let quant = state.data.quant_ref();
let feature_names = &state.data.feature_names;
let enum_values = &state.data.enum_values;
let postcode_data = &state.postcode_data;
let poi_metrics = &state.data.poi_metrics;
let poi_offset = num_features;
let total_export_features = num_features + poi_metrics.num_features();
// Build set of enum feature indices for quick lookup
let enum_indices: FxHashMap<usize, ()> = enum_values.keys().map(|&idx| (idx, ())).collect();
// Group rows by postcode
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
if has_poi_filters && !row_passes_poi_filters(row, &parsed_poi_filters, poi_metrics)
{
return;
}
let postcode = state.data.postcode(row);
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
postcode_rows.entry(pc_idx).or_default().push(row);
}
});
// Aggregate per postcode
let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> =
Vec::with_capacity(postcode_rows.len());
for (pc_idx, rows) in postcode_rows {
let mut agg = PostcodeExportAgg::new(total_export_features);
for &row in &rows {
agg.add_row(
feature_data,
row,
num_features,
&enum_indices,
&quant,
poi_metrics,
);
}
if agg.count > 0 {
postcode_aggs.push((pc_idx, agg));
}
}
// Sort by property count descending
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
// Sample if too many postcodes
let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES;
if was_sampled {
let mut hasher = DefaultHasher::new();
south.to_bits().hash(&mut hasher);
west.to_bits().hash(&mut hasher);
north.to_bits().hash(&mut hasher);
east.to_bits().hash(&mut hasher);
let seed = hasher.finish();
let len = postcode_aggs.len();
for pick in 0..MAX_EXPORT_POSTCODES {
let swap_idx = pick
+ ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64)) as usize
% (len - pick));
postcode_aggs.swap(pick, swap_idx);
}
postcode_aggs.truncate(MAX_EXPORT_POSTCODES);
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
}
// Determine column order: filter features first, then remaining
let filter_feature_names = extract_filter_feature_names(filters_str.as_deref());
let field_indices = parse_field_indices_with_poi(
fields_str.as_deref(),
&state.feature_name_to_index,
&state.data.poi_metrics.name_to_index,
)
.map_err(|err| err.1)?;
let all_feature_indices: Vec<usize> = if let Some(ref indices) = field_indices.normal {
let mut selected = indices.clone();
selected.extend(field_indices.poi.iter().map(|idx| poi_offset + *idx));
selected
} else {
let mut ordered = Vec::with_capacity(total_export_features);
let mut used = FxHashSet::default();
for name in &filter_feature_names {
if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) {
if used.insert(idx) {
ordered.push(idx);
}
} else if let Some(&idx) = state.data.poi_metrics.name_to_index.get(name.as_str()) {
let virtual_idx = poi_offset + idx;
if used.insert(virtual_idx) {
ordered.push(virtual_idx);
}
}
}
for idx in 0..num_features {
if used.insert(idx) {
ordered.push(idx);
}
}
for idx in 0..poi_metrics.num_features() {
let virtual_idx = poi_offset + idx;
if used.insert(virtual_idx) {
ordered.push(virtual_idx);
}
}
ordered
};
// Filter-only feature indices for the Selected sheet
let filter_feature_indices: Vec<usize> = filter_feature_names
.iter()
.filter_map(|name| {
state
.feature_name_to_index
.get(name.as_str())
.copied()
.or_else(|| {
state
.data
.poi_metrics
.name_to_index
.get(name.as_str())
.map(|idx| poi_offset + *idx)
})
})
.collect();
let feature_name_for_idx = |idx: usize| -> &str {
if idx < num_features {
&feature_names[idx]
} else {
&poi_metrics.feature_names[idx - poi_offset]
}
};
// Build feature unit map (feat_idx → (prefix, suffix)) for number formatting
let feature_units: FxHashMap<usize, (&str, &str)> = state
.features_response
.groups
.iter()
.flat_map(|group| &group.features)
.filter_map(|feat| match feat {
FeatureInfo::Numeric {
name,
prefix,
suffix,
..
} => {
if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) {
Some((idx, (*prefix, *suffix)))
} else {
state
.data
.poi_metrics
.name_to_index
.get(name.as_str())
.map(|idx| (poi_offset + *idx, (*prefix, *suffix)))
}
}
_ => None,
})
.collect();
let integer_feature_indices: FxHashSet<usize> = all_feature_indices
.iter()
.copied()
.filter(|&idx| features::has_integer_bins(feature_name_for_idx(idx)))
.collect();
// Build Excel number formats per feature index for unit display
let mut feat_num_fmts: FxHashMap<usize, Format> = FxHashMap::default();
for &feat_idx in &all_feature_indices {
if let Some(&(prefix, suffix)) = feature_units.get(&feat_idx) {
if prefix.is_empty() && suffix.is_empty() {
continue;
}
let num_fmt_str = if !prefix.is_empty() {
format!("\"{}\"#,##0", prefix)
} else if integer_feature_indices.contains(&feat_idx) {
format!("#,##0\"{}\"", suffix)
} else {
format!("#,##0.0\"{}\"", suffix)
};
feat_num_fmts.insert(feat_idx, Format::new().set_num_format(&num_fmt_str));
}
}
// Build Excel workbook with two sheets
let mut workbook = Workbook::new();
// Formats
let header_fmt = Format::new()
.set_bold()
.set_border_bottom(FormatBorder::Thin)
.set_align(FormatAlign::Center);
let desc_fmt = Format::new()
.set_italic()
.set_font_color("#666666")
.set_font_size(9)
.set_align(FormatAlign::Center)
.set_text_wrap();
let link_fmt = Format::new()
.set_font_color("#0563C1")
.set_underline(rust_xlsxwriter::FormatUnderline::Single)
.set_font_size(11);
let note_fmt = Format::new()
.set_italic()
.set_font_color("#666666")
.set_align(FormatAlign::Left);
// Dashboard URL
let dashboard_url = format!("{}/?{}", public_url, frontend_params);
// Sheet 1: "Selected" (filter features only) with link + screenshot
// Sheet 2: "All Data" (all features)
let sheet_configs: [(&str, &[usize], bool); 2] = [
("Selected", &filter_feature_indices, true),
("All Data", &all_feature_indices, false),
];
for (sheet_name, feat_indices, include_header) in &sheet_configs {
let sheet = workbook.add_worksheet();
sheet
.set_name(*sheet_name)
.map_err(|e| format!("Failed to set sheet name: {e}"))?;
let mut current_row = 0u32;
if *include_header {
// URL row
sheet
.write_url(
0,
0,
Url::new(&dashboard_url).set_text("View on Perfect Postcode"),
)
.map_err(|e| format!("Failed to write URL: {e}"))?;
sheet
.set_row_format(0, &link_fmt)
.map_err(|e| format!("Failed to set row format: {e}"))?;
current_row = 1;
// Screenshot
if let Some(ref img_bytes) = screenshot_bytes {
match Image::new_from_buffer(img_bytes) {
Ok(mut image) => {
image = image.set_scale_to_size(400, 300, true);
sheet
.insert_image(current_row, 0, &image)
.map_err(|e| format!("Failed to insert screenshot: {e}"))?;
sheet
.set_row_height(current_row, IMAGE_ROW_HEIGHT)
.map_err(|e| format!("Failed to set image row height: {e}"))?;
current_row += 1;
}
Err(err) => {
warn!("Failed to parse screenshot for export: {err}");
}
}
}
// Blank row between image and header
current_row += 1;
}
// Header row
let header_row = current_row;
sheet
.write_string_with_format(header_row, 0, "Postcode", &header_fmt)
.map_err(|e| format!("Failed to write header: {e}"))?;
sheet
.write_string_with_format(header_row, 1, "Properties", &header_fmt)
.map_err(|e| format!("Failed to write header: {e}"))?;
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
sheet
.write_string_with_format(
header_row,
col,
feature_name_for_idx(feat_idx),
&header_fmt,
)
.map_err(|e| format!("Failed to write header: {e}"))?;
}
// Description row
let desc_row = header_row + 1;
sheet
.write_string_with_format(desc_row, 0, "", &desc_fmt)
.map_err(|e| format!("Failed to write desc: {e}"))?;
sheet
.write_string_with_format(desc_row, 1, "Count of properties", &desc_fmt)
.map_err(|e| format!("Failed to write desc: {e}"))?;
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
let desc = feature_descriptions
.get(feature_name_for_idx(feat_idx))
.map(String::as_str)
.unwrap_or("");
sheet
.write_string_with_format(desc_row, col, desc, &desc_fmt)
.map_err(|e| format!("Failed to write desc: {e}"))?;
}
// Data rows
let data_start_row = desc_row + 1;
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
let row = data_start_row + row_offset as u32;
sheet
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
.map_err(|e| format!("Failed to write postcode: {e}"))?;
sheet
.write_number(row, 1, agg.count as f64)
.map_err(|e| format!("Failed to write count: {e}"))?;
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
if let Some((&mode_bits, _)) =
freqs.iter().max_by_key(|(_, &count)| count)
{
let mode_f32 = f32::from_bits(mode_bits);
let mode_idx = mode_f32 as usize;
if let Some(values) = enum_values.get(&feat_idx) {
if mode_idx < values.len() {
sheet.write_string(row, col, &values[mode_idx]).map_err(
|e| format!("Failed to write enum value: {e}"),
)?;
}
}
}
}
} else {
let fc = agg.finite_counts[feat_idx];
if fc > 0 {
let mean = if integer_feature_indices.contains(&feat_idx) {
(agg.sums[feat_idx] / fc as f64).round()
} else {
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
};
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
sheet
.write_number_with_format(row, col, mean, fmt)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
} else {
sheet
.write_number(row, col, mean)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
}
}
}
}
}
// Sample note
if was_sampled {
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
let total_cols = (feat_indices.len() + 2) as u16;
sheet
.merge_range(
note_row,
0,
note_row,
total_cols.saturating_sub(1),
&format!(
"Only the first {} postcodes shown (randomly sampled from results)",
MAX_EXPORT_POSTCODES
),
&note_fmt,
)
.map_err(|e| format!("Failed to write note: {e}"))?;
}
// Column widths
sheet
.set_column_width(0, 12)
.map_err(|e| format!("Failed to set column width: {e}"))?;
sheet
.set_column_width(1, 12)
.map_err(|e| format!("Failed to set column width: {e}"))?;
for col_offset in 0..feat_indices.len() {
let col = (col_offset + 2) as u16;
let feat_name = feature_name_for_idx(feat_indices[col_offset]);
let width = (feat_name.len() as f64 * 1.1).clamp(10.0, 30.0);
sheet
.set_column_width(col, width)
.map_err(|e| format!("Failed to set column width: {e}"))?;
}
}
let buf = workbook
.save_to_buffer()
.map_err(|err| format!("Failed to save workbook: {err}"))?;
let t_total = t0.elapsed();
info!(
postcodes = postcode_aggs.len(),
sampled = was_sampled,
features = all_feature_indices.len(),
has_screenshot = screenshot_bytes.is_some(),
bytes = buf.len(),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
"GET /api/export"
);
Ok(buf)
})
.await
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response())?
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err).into_response())?;
Ok((
[
(
header::CONTENT_TYPE,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
),
(
header::CONTENT_DISPOSITION,
"attachment; filename=\"perfect-postcode-export.xlsx\"",
),
],
bytes,
))
}