From 3fd64785a262d6a21e3ef59c8df23021513c5371 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Fri, 5 Jun 2026 17:42:05 +0100 Subject: [PATCH] group by outcode --- server-rs/src/routes/export.rs | 239 ++++++++++++++++++++++++++------- 1 file changed, 192 insertions(+), 47 deletions(-) diff --git a/server-rs/src/routes/export.rs b/server-rs/src/routes/export.rs index ae5f588..cabc983 100644 --- a/server-rs/src/routes/export.rs +++ b/server-rs/src/routes/export.rs @@ -114,6 +114,92 @@ impl PostcodeExportAgg { self.finite_counts[out_idx] += 1; } } + + /// Fold another postcode's aggregate into this one (used to roll up an + /// outcode summary from its member postcodes). + fn merge_from(&mut self, other: &PostcodeExportAgg) { + self.count += other.count; + for i in 0..self.sums.len() { + self.sums[i] += other.sums[i]; + self.finite_counts[i] += other.finite_counts[i]; + } + for (&feat_idx, freqs) in &other.enum_freqs { + let entry = self.enum_freqs.entry(feat_idx).or_default(); + for (&bits, &count) in freqs { + *entry.entry(bits).or_insert(0) += count; + } + } + } +} + +/// The outcode (first part) of a UK postcode, e.g. "E14" from "E14 2DG". +fn outcode_of(postcode: &str) -> &str { + match postcode.find(' ') { + Some(space_idx) => &postcode[..space_idx], + None => postcode, + } +} + +/// A set of postcodes sharing the same outcode, with a rolled-up aggregate for +/// the group's summary row. `members` indexes into the flat `postcode_aggs`. +struct OutcodeGroup { + outcode: String, + members: Vec, + summary: PostcodeExportAgg, +} + +/// Write the per-feature cells (numeric mean or enum mode) for a single row, +/// shared between outcode summary rows and individual postcode rows. +#[allow(clippy::too_many_arguments)] +fn write_feature_cells( + sheet: &mut rust_xlsxwriter::Worksheet, + row: u32, + feat_indices: &[usize], + agg: &PostcodeExportAgg, + num_features: usize, + enum_indices: &FxHashMap, + enum_values: &FxHashMap>, + integer_feature_indices: &FxHashSet, + feat_num_fmts: &FxHashMap, +) -> Result<(), String> { + for (col_offset, &feat_idx) in feat_indices.iter().enumerate() { + let col = (col_offset + 2) as u16; + + if feat_idx < num_features && enum_indices.contains_key(&feat_idx) { + if let Some(freqs) = agg.enum_freqs.get(&feat_idx) { + if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count) { + let mode_f32 = f32::from_bits(mode_bits); + let mode_idx = mode_f32 as usize; + if let Some(values) = enum_values.get(&feat_idx) { + if mode_idx < values.len() { + sheet + .write_string(row, col, &values[mode_idx]) + .map_err(|e| format!("Failed to write enum value: {e}"))?; + } + } + } + } + } else { + let fc = agg.finite_counts[feat_idx]; + if fc > 0 { + let mean = if integer_feature_indices.contains(&feat_idx) { + (agg.sums[feat_idx] / fc as f64).round() + } else { + (agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0 + }; + if let Some(fmt) = feat_num_fmts.get(&feat_idx) { + sheet + .write_number_with_format(row, col, mean, fmt) + .map_err(|e| format!("Failed to write numeric value: {e}"))?; + } else { + sheet + .write_number(row, col, mean) + .map_err(|e| format!("Failed to write numeric value: {e}"))?; + } + } + } + } + Ok(()) } /// Extract feature names referenced in the filters param (preserving order). @@ -689,6 +775,40 @@ pub async fn get_export( } } + // Group the postcodes by outcode, preserving the existing relevance order + // (property-count-desc in bounds mode, input order in list mode) for the + // groups themselves; postcodes within a group are sorted alphabetically. + // Each group carries a rolled-up summary aggregate for its header row. + let outcode_groups: Vec = { + let mut order: Vec = Vec::new(); + let mut by_outcode: FxHashMap = FxHashMap::default(); + for (i, (pc_idx, agg)) in postcode_aggs.iter().enumerate() { + let outcode = outcode_of(&postcode_data.postcodes[*pc_idx]).to_string(); + let group = by_outcode.entry(outcode.clone()).or_insert_with(|| { + order.push(outcode.clone()); + OutcodeGroup { + outcode: outcode.clone(), + members: Vec::new(), + summary: PostcodeExportAgg::new(total_export_features), + } + }); + group.members.push(i); + group.summary.merge_from(agg); + } + for group in by_outcode.values_mut() { + group + .members + .sort_by(|&a, &b| { + postcode_data.postcodes[postcode_aggs[a].0] + .cmp(&postcode_data.postcodes[postcode_aggs[b].0]) + }); + } + order + .into_iter() + .map(|outcode| by_outcode.remove(&outcode).unwrap()) + .collect() + }; + // Build Excel workbook with two sheets let mut workbook = Workbook::new(); @@ -715,6 +835,10 @@ pub async fn get_export( .set_font_color("#666666") .set_align(FormatAlign::Left); + // Outcode summary-row formats (the collapsible group header). + let group_label_fmt = Format::new().set_bold().set_font_color("#1F4E79"); + let group_count_fmt = Format::new().set_bold(); + // Dashboard URL let dashboard_url = format!( "{}/dashboard?{}", @@ -821,63 +945,75 @@ pub async fn get_export( .map_err(|e| format!("Failed to write desc: {e}"))?; } - // Data rows + // Put the collapse/expand controls above each group so the bold + // outcode summary row acts as the header for its postcodes. + sheet.group_symbols_above(true); + + // Data rows — one bold outcode summary row followed by its postcodes, + // the latter wrapped in a collapsible outline group. let data_start_row = desc_row + 1; - for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() { - let row = data_start_row + row_offset as u32; - + let mut row = data_start_row; + for group in &outcode_groups { + // Outcode summary row (rolled up from the member postcodes). + let summary_row = row; sheet - .write_string(row, 0, &postcode_data.postcodes[*pc_idx]) - .map_err(|e| format!("Failed to write postcode: {e}"))?; - + .write_string_with_format(summary_row, 0, &group.outcode, &group_label_fmt) + .map_err(|e| format!("Failed to write outcode: {e}"))?; sheet - .write_number(row, 1, agg.count as f64) - .map_err(|e| format!("Failed to write count: {e}"))?; + .write_number_with_format( + summary_row, + 1, + group.summary.count as f64, + &group_count_fmt, + ) + .map_err(|e| format!("Failed to write outcode count: {e}"))?; + write_feature_cells( + sheet, + summary_row, + feat_indices, + &group.summary, + num_features, + &enum_indices, + enum_values, + &integer_feature_indices, + &feat_num_fmts, + )?; + row += 1; - for (col_offset, &feat_idx) in feat_indices.iter().enumerate() { - let col = (col_offset + 2) as u16; + // Individual postcode rows for this outcode. + let first_detail_row = row; + for &member in &group.members { + let (pc_idx, agg) = &postcode_aggs[member]; + sheet + .write_string(row, 0, &postcode_data.postcodes[*pc_idx]) + .map_err(|e| format!("Failed to write postcode: {e}"))?; + sheet + .write_number(row, 1, agg.count as f64) + .map_err(|e| format!("Failed to write count: {e}"))?; + write_feature_cells( + sheet, + row, + feat_indices, + agg, + num_features, + &enum_indices, + enum_values, + &integer_feature_indices, + &feat_num_fmts, + )?; + row += 1; + } - if feat_idx < num_features && enum_indices.contains_key(&feat_idx) { - if let Some(freqs) = agg.enum_freqs.get(&feat_idx) { - if let Some((&mode_bits, _)) = - freqs.iter().max_by_key(|(_, &count)| count) - { - let mode_f32 = f32::from_bits(mode_bits); - let mode_idx = mode_f32 as usize; - if let Some(values) = enum_values.get(&feat_idx) { - if mode_idx < values.len() { - sheet.write_string(row, col, &values[mode_idx]).map_err( - |e| format!("Failed to write enum value: {e}"), - )?; - } - } - } - } - } else { - let fc = agg.finite_counts[feat_idx]; - if fc > 0 { - let mean = if integer_feature_indices.contains(&feat_idx) { - (agg.sums[feat_idx] / fc as f64).round() - } else { - (agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0 - }; - if let Some(fmt) = feat_num_fmts.get(&feat_idx) { - sheet - .write_number_with_format(row, col, mean, fmt) - .map_err(|e| format!("Failed to write numeric value: {e}"))?; - } else { - sheet - .write_number(row, col, mean) - .map_err(|e| format!("Failed to write numeric value: {e}"))?; - } - } - } + if row > first_detail_row { + sheet + .group_rows(first_detail_row, row - 1) + .map_err(|e| format!("Failed to group rows: {e}"))?; } } // Sample note if was_sampled { - let note_row = data_start_row + postcode_aggs.len() as u32 + 1; + let note_row = row + 1; let total_cols = (feat_indices.len() + 2) as u16; sheet .merge_range( @@ -985,6 +1121,15 @@ mod tests { ); } + #[test] + fn outcode_of_splits_on_the_incode_space() { + assert_eq!(outcode_of("E14 2DG"), "E14"); + assert_eq!(outcode_of("SW1A 1AA"), "SW1A"); + assert_eq!(outcode_of("M1 1AE"), "M1"); + // Defensive: a value with no space is treated as its own outcode. + assert_eq!(outcode_of("E14"), "E14"); + } + #[test] fn export_query_deserializes_when_tt_is_a_single_string() { let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"