group by outcode

This commit is contained in:
Andras Schmelczer 2026-06-05 17:42:05 +01:00
parent 70ac7b95e6
commit 3fd64785a2

View file

@ -114,6 +114,92 @@ impl PostcodeExportAgg {
self.finite_counts[out_idx] += 1;
}
}
/// Fold another postcode's aggregate into this one (used to roll up an
/// outcode summary from its member postcodes).
fn merge_from(&mut self, other: &PostcodeExportAgg) {
self.count += other.count;
for i in 0..self.sums.len() {
self.sums[i] += other.sums[i];
self.finite_counts[i] += other.finite_counts[i];
}
for (&feat_idx, freqs) in &other.enum_freqs {
let entry = self.enum_freqs.entry(feat_idx).or_default();
for (&bits, &count) in freqs {
*entry.entry(bits).or_insert(0) += count;
}
}
}
}
/// The outcode (first part) of a UK postcode, e.g. "E14" from "E14 2DG".
fn outcode_of(postcode: &str) -> &str {
match postcode.find(' ') {
Some(space_idx) => &postcode[..space_idx],
None => postcode,
}
}
/// A set of postcodes sharing the same outcode, with a rolled-up aggregate for
/// the group's summary row. `members` indexes into the flat `postcode_aggs`.
struct OutcodeGroup {
outcode: String,
members: Vec<usize>,
summary: PostcodeExportAgg,
}
/// Write the per-feature cells (numeric mean or enum mode) for a single row,
/// shared between outcode summary rows and individual postcode rows.
#[allow(clippy::too_many_arguments)]
fn write_feature_cells(
sheet: &mut rust_xlsxwriter::Worksheet,
row: u32,
feat_indices: &[usize],
agg: &PostcodeExportAgg,
num_features: usize,
enum_indices: &FxHashMap<usize, ()>,
enum_values: &FxHashMap<usize, Vec<String>>,
integer_feature_indices: &FxHashSet<usize>,
feat_num_fmts: &FxHashMap<usize, Format>,
) -> Result<(), String> {
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count) {
let mode_f32 = f32::from_bits(mode_bits);
let mode_idx = mode_f32 as usize;
if let Some(values) = enum_values.get(&feat_idx) {
if mode_idx < values.len() {
sheet
.write_string(row, col, &values[mode_idx])
.map_err(|e| format!("Failed to write enum value: {e}"))?;
}
}
}
}
} else {
let fc = agg.finite_counts[feat_idx];
if fc > 0 {
let mean = if integer_feature_indices.contains(&feat_idx) {
(agg.sums[feat_idx] / fc as f64).round()
} else {
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
};
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
sheet
.write_number_with_format(row, col, mean, fmt)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
} else {
sheet
.write_number(row, col, mean)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
}
}
}
}
Ok(())
}
/// Extract feature names referenced in the filters param (preserving order).
@ -689,6 +775,40 @@ pub async fn get_export(
}
}
// Group the postcodes by outcode, preserving the existing relevance order
// (property-count-desc in bounds mode, input order in list mode) for the
// groups themselves; postcodes within a group are sorted alphabetically.
// Each group carries a rolled-up summary aggregate for its header row.
let outcode_groups: Vec<OutcodeGroup> = {
let mut order: Vec<String> = Vec::new();
let mut by_outcode: FxHashMap<String, OutcodeGroup> = FxHashMap::default();
for (i, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
let outcode = outcode_of(&postcode_data.postcodes[*pc_idx]).to_string();
let group = by_outcode.entry(outcode.clone()).or_insert_with(|| {
order.push(outcode.clone());
OutcodeGroup {
outcode: outcode.clone(),
members: Vec::new(),
summary: PostcodeExportAgg::new(total_export_features),
}
});
group.members.push(i);
group.summary.merge_from(agg);
}
for group in by_outcode.values_mut() {
group
.members
.sort_by(|&a, &b| {
postcode_data.postcodes[postcode_aggs[a].0]
.cmp(&postcode_data.postcodes[postcode_aggs[b].0])
});
}
order
.into_iter()
.map(|outcode| by_outcode.remove(&outcode).unwrap())
.collect()
};
// Build Excel workbook with two sheets
let mut workbook = Workbook::new();
@ -715,6 +835,10 @@ pub async fn get_export(
.set_font_color("#666666")
.set_align(FormatAlign::Left);
// Outcode summary-row formats (the collapsible group header).
let group_label_fmt = Format::new().set_bold().set_font_color("#1F4E79");
let group_count_fmt = Format::new().set_bold();
// Dashboard URL
let dashboard_url = format!(
"{}/dashboard?{}",
@ -821,63 +945,75 @@ pub async fn get_export(
.map_err(|e| format!("Failed to write desc: {e}"))?;
}
// Data rows
let data_start_row = desc_row + 1;
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
let row = data_start_row + row_offset as u32;
// Put the collapse/expand controls above each group so the bold
// outcode summary row acts as the header for its postcodes.
sheet.group_symbols_above(true);
// Data rows — one bold outcode summary row followed by its postcodes,
// the latter wrapped in a collapsible outline group.
let data_start_row = desc_row + 1;
let mut row = data_start_row;
for group in &outcode_groups {
// Outcode summary row (rolled up from the member postcodes).
let summary_row = row;
sheet
.write_string_with_format(summary_row, 0, &group.outcode, &group_label_fmt)
.map_err(|e| format!("Failed to write outcode: {e}"))?;
sheet
.write_number_with_format(
summary_row,
1,
group.summary.count as f64,
&group_count_fmt,
)
.map_err(|e| format!("Failed to write outcode count: {e}"))?;
write_feature_cells(
sheet,
summary_row,
feat_indices,
&group.summary,
num_features,
&enum_indices,
enum_values,
&integer_feature_indices,
&feat_num_fmts,
)?;
row += 1;
// Individual postcode rows for this outcode.
let first_detail_row = row;
for &member in &group.members {
let (pc_idx, agg) = &postcode_aggs[member];
sheet
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
.map_err(|e| format!("Failed to write postcode: {e}"))?;
sheet
.write_number(row, 1, agg.count as f64)
.map_err(|e| format!("Failed to write count: {e}"))?;
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
if let Some((&mode_bits, _)) =
freqs.iter().max_by_key(|(_, &count)| count)
{
let mode_f32 = f32::from_bits(mode_bits);
let mode_idx = mode_f32 as usize;
if let Some(values) = enum_values.get(&feat_idx) {
if mode_idx < values.len() {
sheet.write_string(row, col, &values[mode_idx]).map_err(
|e| format!("Failed to write enum value: {e}"),
write_feature_cells(
sheet,
row,
feat_indices,
agg,
num_features,
&enum_indices,
enum_values,
&integer_feature_indices,
&feat_num_fmts,
)?;
row += 1;
}
}
}
}
} else {
let fc = agg.finite_counts[feat_idx];
if fc > 0 {
let mean = if integer_feature_indices.contains(&feat_idx) {
(agg.sums[feat_idx] / fc as f64).round()
} else {
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
};
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
if row > first_detail_row {
sheet
.write_number_with_format(row, col, mean, fmt)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
} else {
sheet
.write_number(row, col, mean)
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
}
}
}
.group_rows(first_detail_row, row - 1)
.map_err(|e| format!("Failed to group rows: {e}"))?;
}
}
// Sample note
if was_sampled {
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
let note_row = row + 1;
let total_cols = (feat_indices.len() + 2) as u16;
sheet
.merge_range(
@ -985,6 +1121,15 @@ mod tests {
);
}
#[test]
fn outcode_of_splits_on_the_incode_space() {
assert_eq!(outcode_of("E14 2DG"), "E14");
assert_eq!(outcode_of("SW1A 1AA"), "SW1A");
assert_eq!(outcode_of("M1 1AE"), "M1");
// Defensive: a value with no space is treated as its own outcode.
assert_eq!(outcode_of("E14"), "E14");
}
#[test]
fn export_query_deserializes_when_tt_is_a_single_string() {
let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"