use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::sync::Arc; use std::time::Duration; use axum::extract::{Query, State}; use axum::http::{header, HeaderMap, StatusCode}; use axum::response::IntoResponse; use axum::Extension; use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook}; use rustc_hash::{FxHashMap, FxHashSet}; use serde::Deserialize; use tracing::{info, warn}; use crate::auth::OptionalUser; use crate::consts::NAN_U16; use crate::data::{PostcodePoiMetrics, QuantRef}; use crate::features; use crate::licensing::check_license_bounds; use crate::parsing::{ parse_field_indices_with_poi, parse_filters_with_poi, require_bounds, row_passes_filters, row_passes_poi_filters, }; use crate::routes::{fetch_screenshot_bytes, FeatureInfo}; use crate::state::SharedState; const MAX_EXPORT_POSTCODES: usize = 250; const EXPORT_SCREENSHOT_TIMEOUT_SECS: u64 = 12; /// Height (in pixels) reserved for the screenshot row const IMAGE_ROW_HEIGHT: f64 = 225.0; #[derive(Deserialize)] pub struct ExportParams { bounds: Option, filters: Option, fields: Option, } /// Per-postcode accumulator for export aggregation (mean for numeric, mode for enum). struct PostcodeExportAgg { count: u32, sums: Vec, finite_counts: Vec, /// feat_idx -> (value_bits -> count) for enum mode calculation enum_freqs: FxHashMap>, } impl PostcodeExportAgg { fn new(total_features: usize) -> Self { Self { count: 0, sums: vec![0.0; total_features], finite_counts: vec![0; total_features], enum_freqs: FxHashMap::default(), } } #[inline] fn add_row( &mut self, feature_data: &[u16], row: usize, num_features: usize, enum_indices: &FxHashMap, quant: &QuantRef, poi_metrics: &PostcodePoiMetrics, ) { self.count += 1; let base = row * num_features; let row_slice = &feature_data[base..base + num_features]; for (feat_idx, &raw) in row_slice.iter().enumerate() { if raw == NAN_U16 { continue; } let value = quant.decode(feat_idx, raw); if enum_indices.contains_key(&feat_idx) { *self .enum_freqs .entry(feat_idx) .or_default() .entry(value.to_bits()) .or_insert(0) += 1; } else { self.sums[feat_idx] += value as f64; self.finite_counts[feat_idx] += 1; } } let poi_offset = num_features; for metric_idx in 0..poi_metrics.num_features() { let raw = poi_metrics.raw_for_property_row(row, metric_idx); if raw == NAN_U16 { continue; } let value = poi_metrics.decode_raw(metric_idx, raw); let out_idx = poi_offset + metric_idx; self.sums[out_idx] += value as f64; self.finite_counts[out_idx] += 1; } } } /// Extract feature names referenced in the filters param (preserving order). fn extract_filter_feature_names(filters_str: Option<&str>) -> Vec { let input = match filters_str.filter(|text| !text.is_empty()) { Some(text) => text, None => return Vec::new(), }; let mut names = Vec::new(); for entry in input.split(";;") { let parts: Vec<&str> = entry.splitn(2, ':').collect(); if parts.len() == 2 { let name = parts[0].trim().to_string(); if !names.contains(&name) { names.push(name); } } } names } /// Build frontend-style query params for screenshot/dashboard URLs. fn build_frontend_params( center_lat: f64, center_lon: f64, zoom: f64, filters_str: Option<&str>, ) -> String { let mut parts = vec![ format!("lat={:.4}", center_lat), format!("lon={:.4}", center_lon), format!("zoom={:.1}", zoom), ]; if let Some(fs) = filters_str { if !fs.is_empty() { for entry in fs.split(";;") { if !entry.is_empty() { parts.push(format!("filter={}", urlencoding::encode(entry.trim()))); } } } } parts.join("&") } pub async fn get_export( State(shared): State>, headers: HeaderMap, Extension(user): Extension, Query(params): Query, ) -> Result { let state = shared.load_state(); let (south, west, north, east) = require_bounds(params.bounds).map_err(IntoResponse::into_response)?; check_license_bounds(&user.0, (south, west, north, east), None)?; let quant = state.data.quant_ref(); let poi_quant = state.data.poi_metrics.quant_ref(); let (parsed_filters, parsed_enum_filters, parsed_poi_filters) = parse_filters_with_poi( params.filters.as_deref(), &state.feature_name_to_index, &state.data.enum_values, &quant, &state.data.poi_metrics.name_to_index, &poi_quant, ) .map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?; let has_poi_filters = !parsed_poi_filters.is_empty(); let filters_str = params.filters; let fields_str = params.fields; let public_url = state.public_url.clone(); // Compute view center for screenshot and dashboard URL let center_lat = (south + north) / 2.0; let center_lon = (west + east) / 2.0; let lat_span = north - south; let zoom = if lat_span > 0.0 { (360.0 / lat_span).log2().clamp(1.0, 18.0) } else { 12.0 }; let frontend_params = build_frontend_params(center_lat, center_lon, zoom, filters_str.as_deref()); // Fetch screenshot (async, before spawn_blocking) let auth_header = headers.get(header::AUTHORIZATION); let screenshot_fetch = fetch_screenshot_bytes(&state, &frontend_params, auth_header); let screenshot_bytes = match tokio::time::timeout( Duration::from_secs(EXPORT_SCREENSHOT_TIMEOUT_SECS), screenshot_fetch, ) .await { Ok(Ok(bytes)) => { info!(bytes = bytes.len(), "Fetched screenshot for export"); Some(bytes) } Ok(Err(err)) => { warn!("Screenshot failed for export: {err}"); None } Err(_) => { warn!( timeout_secs = EXPORT_SCREENSHOT_TIMEOUT_SECS, "Screenshot timed out for export" ); None } }; // Build feature name → description map from the precomputed features response let feature_descriptions: FxHashMap = state .features_response .groups .iter() .flat_map(|group| &group.features) .map(|feat| match feat { FeatureInfo::Numeric { name, description, .. } => (name.clone(), description.to_string()), FeatureInfo::Enum { name, description, .. } => (name.clone(), description.to_string()), }) .collect(); let bytes = tokio::task::spawn_blocking(move || -> Result, String> { let t0 = std::time::Instant::now(); let num_features = state.data.num_features; let feature_data = &state.data.feature_data; let quant = state.data.quant_ref(); let feature_names = &state.data.feature_names; let enum_values = &state.data.enum_values; let postcode_data = &state.postcode_data; let poi_metrics = &state.data.poi_metrics; let poi_offset = num_features; let total_export_features = num_features + poi_metrics.num_features(); // Build set of enum feature indices for quick lookup let enum_indices: FxHashMap = enum_values.keys().map(|&idx| (idx, ())).collect(); // Group rows by postcode let mut postcode_rows: FxHashMap> = FxHashMap::default(); state .grid .for_each_in_bounds(south, west, north, east, |row_idx| { let row = row_idx as usize; if !row_passes_filters( row, &parsed_filters, &parsed_enum_filters, feature_data, num_features, ) { return; } if has_poi_filters && !row_passes_poi_filters(row, &parsed_poi_filters, poi_metrics) { return; } let postcode = state.data.postcode(row); if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) { postcode_rows.entry(pc_idx).or_default().push(row); } }); // Aggregate per postcode let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> = Vec::with_capacity(postcode_rows.len()); for (pc_idx, rows) in postcode_rows { let mut agg = PostcodeExportAgg::new(total_export_features); for &row in &rows { agg.add_row( feature_data, row, num_features, &enum_indices, &quant, poi_metrics, ); } if agg.count > 0 { postcode_aggs.push((pc_idx, agg)); } } // Sort by property count descending postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count)); // Sample if too many postcodes let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES; if was_sampled { let mut hasher = DefaultHasher::new(); south.to_bits().hash(&mut hasher); west.to_bits().hash(&mut hasher); north.to_bits().hash(&mut hasher); east.to_bits().hash(&mut hasher); let seed = hasher.finish(); let len = postcode_aggs.len(); for pick in 0..MAX_EXPORT_POSTCODES { let swap_idx = pick + ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64)) as usize % (len - pick)); postcode_aggs.swap(pick, swap_idx); } postcode_aggs.truncate(MAX_EXPORT_POSTCODES); postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count)); } // Determine column order: filter features first, then remaining let filter_feature_names = extract_filter_feature_names(filters_str.as_deref()); let field_indices = parse_field_indices_with_poi( fields_str.as_deref(), &state.feature_name_to_index, &state.data.poi_metrics.name_to_index, ) .map_err(|err| err.1)?; let all_feature_indices: Vec = if let Some(ref indices) = field_indices.normal { let mut selected = indices.clone(); selected.extend(field_indices.poi.iter().map(|idx| poi_offset + *idx)); selected } else { let mut ordered = Vec::with_capacity(total_export_features); let mut used = FxHashSet::default(); for name in &filter_feature_names { if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) { if used.insert(idx) { ordered.push(idx); } } else if let Some(&idx) = state.data.poi_metrics.name_to_index.get(name.as_str()) { let virtual_idx = poi_offset + idx; if used.insert(virtual_idx) { ordered.push(virtual_idx); } } } for idx in 0..num_features { if used.insert(idx) { ordered.push(idx); } } for idx in 0..poi_metrics.num_features() { let virtual_idx = poi_offset + idx; if used.insert(virtual_idx) { ordered.push(virtual_idx); } } ordered }; // Filter-only feature indices for the Selected sheet let filter_feature_indices: Vec = filter_feature_names .iter() .filter_map(|name| { state .feature_name_to_index .get(name.as_str()) .copied() .or_else(|| { state .data .poi_metrics .name_to_index .get(name.as_str()) .map(|idx| poi_offset + *idx) }) }) .collect(); let feature_name_for_idx = |idx: usize| -> &str { if idx < num_features { &feature_names[idx] } else { &poi_metrics.feature_names[idx - poi_offset] } }; // Build feature unit map (feat_idx → (prefix, suffix)) for number formatting let feature_units: FxHashMap = state .features_response .groups .iter() .flat_map(|group| &group.features) .filter_map(|feat| match feat { FeatureInfo::Numeric { name, prefix, suffix, .. } => { if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) { Some((idx, (*prefix, *suffix))) } else { state .data .poi_metrics .name_to_index .get(name.as_str()) .map(|idx| (poi_offset + *idx, (*prefix, *suffix))) } } _ => None, }) .collect(); let integer_feature_indices: FxHashSet = all_feature_indices .iter() .copied() .filter(|&idx| features::has_integer_bins(feature_name_for_idx(idx))) .collect(); // Build Excel number formats per feature index for unit display let mut feat_num_fmts: FxHashMap = FxHashMap::default(); for &feat_idx in &all_feature_indices { if let Some(&(prefix, suffix)) = feature_units.get(&feat_idx) { if prefix.is_empty() && suffix.is_empty() { continue; } let num_fmt_str = if !prefix.is_empty() { format!("\"{}\"#,##0", prefix) } else if integer_feature_indices.contains(&feat_idx) { format!("#,##0\"{}\"", suffix) } else { format!("#,##0.0\"{}\"", suffix) }; feat_num_fmts.insert(feat_idx, Format::new().set_num_format(&num_fmt_str)); } } // Build Excel workbook with two sheets let mut workbook = Workbook::new(); // Formats let header_fmt = Format::new() .set_bold() .set_border_bottom(FormatBorder::Thin) .set_align(FormatAlign::Center); let desc_fmt = Format::new() .set_italic() .set_font_color("#666666") .set_font_size(9) .set_align(FormatAlign::Center) .set_text_wrap(); let link_fmt = Format::new() .set_font_color("#0563C1") .set_underline(rust_xlsxwriter::FormatUnderline::Single) .set_font_size(11); let note_fmt = Format::new() .set_italic() .set_font_color("#666666") .set_align(FormatAlign::Left); // Dashboard URL let dashboard_url = format!("{}/?{}", public_url, frontend_params); // Sheet 1: "Selected" (filter features only) with link + screenshot // Sheet 2: "All Data" (all features) let sheet_configs: [(&str, &[usize], bool); 2] = [ ("Selected", &filter_feature_indices, true), ("All Data", &all_feature_indices, false), ]; for (sheet_name, feat_indices, include_header) in &sheet_configs { let sheet = workbook.add_worksheet(); sheet .set_name(*sheet_name) .map_err(|e| format!("Failed to set sheet name: {e}"))?; let mut current_row = 0u32; if *include_header { // URL row sheet .write_url( 0, 0, Url::new(&dashboard_url).set_text("View on Perfect Postcode"), ) .map_err(|e| format!("Failed to write URL: {e}"))?; sheet .set_row_format(0, &link_fmt) .map_err(|e| format!("Failed to set row format: {e}"))?; current_row = 1; // Screenshot if let Some(ref img_bytes) = screenshot_bytes { match Image::new_from_buffer(img_bytes) { Ok(mut image) => { image = image.set_scale_to_size(400, 300, true); sheet .insert_image(current_row, 0, &image) .map_err(|e| format!("Failed to insert screenshot: {e}"))?; sheet .set_row_height(current_row, IMAGE_ROW_HEIGHT) .map_err(|e| format!("Failed to set image row height: {e}"))?; current_row += 1; } Err(err) => { warn!("Failed to parse screenshot for export: {err}"); } } } // Blank row between image and header current_row += 1; } // Header row let header_row = current_row; sheet .write_string_with_format(header_row, 0, "Postcode", &header_fmt) .map_err(|e| format!("Failed to write header: {e}"))?; sheet .write_string_with_format(header_row, 1, "Properties", &header_fmt) .map_err(|e| format!("Failed to write header: {e}"))?; for (col_offset, &feat_idx) in feat_indices.iter().enumerate() { let col = (col_offset + 2) as u16; sheet .write_string_with_format( header_row, col, feature_name_for_idx(feat_idx), &header_fmt, ) .map_err(|e| format!("Failed to write header: {e}"))?; } // Description row let desc_row = header_row + 1; sheet .write_string_with_format(desc_row, 0, "", &desc_fmt) .map_err(|e| format!("Failed to write desc: {e}"))?; sheet .write_string_with_format(desc_row, 1, "Count of properties", &desc_fmt) .map_err(|e| format!("Failed to write desc: {e}"))?; for (col_offset, &feat_idx) in feat_indices.iter().enumerate() { let col = (col_offset + 2) as u16; let desc = feature_descriptions .get(feature_name_for_idx(feat_idx)) .map(String::as_str) .unwrap_or(""); sheet .write_string_with_format(desc_row, col, desc, &desc_fmt) .map_err(|e| format!("Failed to write desc: {e}"))?; } // Data rows let data_start_row = desc_row + 1; for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() { let row = data_start_row + row_offset as u32; sheet .write_string(row, 0, &postcode_data.postcodes[*pc_idx]) .map_err(|e| format!("Failed to write postcode: {e}"))?; sheet .write_number(row, 1, agg.count as f64) .map_err(|e| format!("Failed to write count: {e}"))?; for (col_offset, &feat_idx) in feat_indices.iter().enumerate() { let col = (col_offset + 2) as u16; if feat_idx < num_features && enum_indices.contains_key(&feat_idx) { if let Some(freqs) = agg.enum_freqs.get(&feat_idx) { if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count) { let mode_f32 = f32::from_bits(mode_bits); let mode_idx = mode_f32 as usize; if let Some(values) = enum_values.get(&feat_idx) { if mode_idx < values.len() { sheet.write_string(row, col, &values[mode_idx]).map_err( |e| format!("Failed to write enum value: {e}"), )?; } } } } } else { let fc = agg.finite_counts[feat_idx]; if fc > 0 { let mean = if integer_feature_indices.contains(&feat_idx) { (agg.sums[feat_idx] / fc as f64).round() } else { (agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0 }; if let Some(fmt) = feat_num_fmts.get(&feat_idx) { sheet .write_number_with_format(row, col, mean, fmt) .map_err(|e| format!("Failed to write numeric value: {e}"))?; } else { sheet .write_number(row, col, mean) .map_err(|e| format!("Failed to write numeric value: {e}"))?; } } } } } // Sample note if was_sampled { let note_row = data_start_row + postcode_aggs.len() as u32 + 1; let total_cols = (feat_indices.len() + 2) as u16; sheet .merge_range( note_row, 0, note_row, total_cols.saturating_sub(1), &format!( "Only the first {} postcodes shown (randomly sampled from results)", MAX_EXPORT_POSTCODES ), ¬e_fmt, ) .map_err(|e| format!("Failed to write note: {e}"))?; } // Column widths sheet .set_column_width(0, 12) .map_err(|e| format!("Failed to set column width: {e}"))?; sheet .set_column_width(1, 12) .map_err(|e| format!("Failed to set column width: {e}"))?; for col_offset in 0..feat_indices.len() { let col = (col_offset + 2) as u16; let feat_name = feature_name_for_idx(feat_indices[col_offset]); let width = (feat_name.len() as f64 * 1.1).clamp(10.0, 30.0); sheet .set_column_width(col, width) .map_err(|e| format!("Failed to set column width: {e}"))?; } } let buf = workbook .save_to_buffer() .map_err(|err| format!("Failed to save workbook: {err}"))?; let t_total = t0.elapsed(); info!( postcodes = postcode_aggs.len(), sampled = was_sampled, features = all_feature_indices.len(), has_screenshot = screenshot_bytes.is_some(), bytes = buf.len(), total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0), "GET /api/export" ); Ok(buf) }) .await .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response())? .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err).into_response())?; Ok(( [ ( header::CONTENT_TYPE, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ), ( header::CONTENT_DISPOSITION, "attachment; filename=\"perfect-postcode-export.xlsx\"", ), ], bytes, )) }