group by outcode
This commit is contained in:
parent
70ac7b95e6
commit
3fd64785a2
1 changed files with 192 additions and 47 deletions
|
|
@ -114,6 +114,92 @@ impl PostcodeExportAgg {
|
|||
self.finite_counts[out_idx] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Fold another postcode's aggregate into this one (used to roll up an
|
||||
/// outcode summary from its member postcodes).
|
||||
fn merge_from(&mut self, other: &PostcodeExportAgg) {
|
||||
self.count += other.count;
|
||||
for i in 0..self.sums.len() {
|
||||
self.sums[i] += other.sums[i];
|
||||
self.finite_counts[i] += other.finite_counts[i];
|
||||
}
|
||||
for (&feat_idx, freqs) in &other.enum_freqs {
|
||||
let entry = self.enum_freqs.entry(feat_idx).or_default();
|
||||
for (&bits, &count) in freqs {
|
||||
*entry.entry(bits).or_insert(0) += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The outcode (first part) of a UK postcode, e.g. "E14" from "E14 2DG".
|
||||
fn outcode_of(postcode: &str) -> &str {
|
||||
match postcode.find(' ') {
|
||||
Some(space_idx) => &postcode[..space_idx],
|
||||
None => postcode,
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of postcodes sharing the same outcode, with a rolled-up aggregate for
|
||||
/// the group's summary row. `members` indexes into the flat `postcode_aggs`.
|
||||
struct OutcodeGroup {
|
||||
outcode: String,
|
||||
members: Vec<usize>,
|
||||
summary: PostcodeExportAgg,
|
||||
}
|
||||
|
||||
/// Write the per-feature cells (numeric mean or enum mode) for a single row,
|
||||
/// shared between outcode summary rows and individual postcode rows.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn write_feature_cells(
|
||||
sheet: &mut rust_xlsxwriter::Worksheet,
|
||||
row: u32,
|
||||
feat_indices: &[usize],
|
||||
agg: &PostcodeExportAgg,
|
||||
num_features: usize,
|
||||
enum_indices: &FxHashMap<usize, ()>,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
integer_feature_indices: &FxHashSet<usize>,
|
||||
feat_num_fmts: &FxHashMap<usize, Format>,
|
||||
) -> Result<(), String> {
|
||||
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
|
||||
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
|
||||
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
|
||||
if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count) {
|
||||
let mode_f32 = f32::from_bits(mode_bits);
|
||||
let mode_idx = mode_f32 as usize;
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
if mode_idx < values.len() {
|
||||
sheet
|
||||
.write_string(row, col, &values[mode_idx])
|
||||
.map_err(|e| format!("Failed to write enum value: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let fc = agg.finite_counts[feat_idx];
|
||||
if fc > 0 {
|
||||
let mean = if integer_feature_indices.contains(&feat_idx) {
|
||||
(agg.sums[feat_idx] / fc as f64).round()
|
||||
} else {
|
||||
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
|
||||
};
|
||||
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
|
||||
sheet
|
||||
.write_number_with_format(row, col, mean, fmt)
|
||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||
} else {
|
||||
sheet
|
||||
.write_number(row, col, mean)
|
||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Extract feature names referenced in the filters param (preserving order).
|
||||
|
|
@ -689,6 +775,40 @@ pub async fn get_export(
|
|||
}
|
||||
}
|
||||
|
||||
// Group the postcodes by outcode, preserving the existing relevance order
|
||||
// (property-count-desc in bounds mode, input order in list mode) for the
|
||||
// groups themselves; postcodes within a group are sorted alphabetically.
|
||||
// Each group carries a rolled-up summary aggregate for its header row.
|
||||
let outcode_groups: Vec<OutcodeGroup> = {
|
||||
let mut order: Vec<String> = Vec::new();
|
||||
let mut by_outcode: FxHashMap<String, OutcodeGroup> = FxHashMap::default();
|
||||
for (i, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
|
||||
let outcode = outcode_of(&postcode_data.postcodes[*pc_idx]).to_string();
|
||||
let group = by_outcode.entry(outcode.clone()).or_insert_with(|| {
|
||||
order.push(outcode.clone());
|
||||
OutcodeGroup {
|
||||
outcode: outcode.clone(),
|
||||
members: Vec::new(),
|
||||
summary: PostcodeExportAgg::new(total_export_features),
|
||||
}
|
||||
});
|
||||
group.members.push(i);
|
||||
group.summary.merge_from(agg);
|
||||
}
|
||||
for group in by_outcode.values_mut() {
|
||||
group
|
||||
.members
|
||||
.sort_by(|&a, &b| {
|
||||
postcode_data.postcodes[postcode_aggs[a].0]
|
||||
.cmp(&postcode_data.postcodes[postcode_aggs[b].0])
|
||||
});
|
||||
}
|
||||
order
|
||||
.into_iter()
|
||||
.map(|outcode| by_outcode.remove(&outcode).unwrap())
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Build Excel workbook with two sheets
|
||||
let mut workbook = Workbook::new();
|
||||
|
||||
|
|
@ -715,6 +835,10 @@ pub async fn get_export(
|
|||
.set_font_color("#666666")
|
||||
.set_align(FormatAlign::Left);
|
||||
|
||||
// Outcode summary-row formats (the collapsible group header).
|
||||
let group_label_fmt = Format::new().set_bold().set_font_color("#1F4E79");
|
||||
let group_count_fmt = Format::new().set_bold();
|
||||
|
||||
// Dashboard URL
|
||||
let dashboard_url = format!(
|
||||
"{}/dashboard?{}",
|
||||
|
|
@ -821,63 +945,75 @@ pub async fn get_export(
|
|||
.map_err(|e| format!("Failed to write desc: {e}"))?;
|
||||
}
|
||||
|
||||
// Data rows
|
||||
let data_start_row = desc_row + 1;
|
||||
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
|
||||
let row = data_start_row + row_offset as u32;
|
||||
// Put the collapse/expand controls above each group so the bold
|
||||
// outcode summary row acts as the header for its postcodes.
|
||||
sheet.group_symbols_above(true);
|
||||
|
||||
// Data rows — one bold outcode summary row followed by its postcodes,
|
||||
// the latter wrapped in a collapsible outline group.
|
||||
let data_start_row = desc_row + 1;
|
||||
let mut row = data_start_row;
|
||||
for group in &outcode_groups {
|
||||
// Outcode summary row (rolled up from the member postcodes).
|
||||
let summary_row = row;
|
||||
sheet
|
||||
.write_string_with_format(summary_row, 0, &group.outcode, &group_label_fmt)
|
||||
.map_err(|e| format!("Failed to write outcode: {e}"))?;
|
||||
sheet
|
||||
.write_number_with_format(
|
||||
summary_row,
|
||||
1,
|
||||
group.summary.count as f64,
|
||||
&group_count_fmt,
|
||||
)
|
||||
.map_err(|e| format!("Failed to write outcode count: {e}"))?;
|
||||
write_feature_cells(
|
||||
sheet,
|
||||
summary_row,
|
||||
feat_indices,
|
||||
&group.summary,
|
||||
num_features,
|
||||
&enum_indices,
|
||||
enum_values,
|
||||
&integer_feature_indices,
|
||||
&feat_num_fmts,
|
||||
)?;
|
||||
row += 1;
|
||||
|
||||
// Individual postcode rows for this outcode.
|
||||
let first_detail_row = row;
|
||||
for &member in &group.members {
|
||||
let (pc_idx, agg) = &postcode_aggs[member];
|
||||
sheet
|
||||
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
|
||||
.map_err(|e| format!("Failed to write postcode: {e}"))?;
|
||||
|
||||
sheet
|
||||
.write_number(row, 1, agg.count as f64)
|
||||
.map_err(|e| format!("Failed to write count: {e}"))?;
|
||||
|
||||
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
|
||||
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
|
||||
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
|
||||
if let Some((&mode_bits, _)) =
|
||||
freqs.iter().max_by_key(|(_, &count)| count)
|
||||
{
|
||||
let mode_f32 = f32::from_bits(mode_bits);
|
||||
let mode_idx = mode_f32 as usize;
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
if mode_idx < values.len() {
|
||||
sheet.write_string(row, col, &values[mode_idx]).map_err(
|
||||
|e| format!("Failed to write enum value: {e}"),
|
||||
write_feature_cells(
|
||||
sheet,
|
||||
row,
|
||||
feat_indices,
|
||||
agg,
|
||||
num_features,
|
||||
&enum_indices,
|
||||
enum_values,
|
||||
&integer_feature_indices,
|
||||
&feat_num_fmts,
|
||||
)?;
|
||||
row += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let fc = agg.finite_counts[feat_idx];
|
||||
if fc > 0 {
|
||||
let mean = if integer_feature_indices.contains(&feat_idx) {
|
||||
(agg.sums[feat_idx] / fc as f64).round()
|
||||
} else {
|
||||
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
|
||||
};
|
||||
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
|
||||
|
||||
if row > first_detail_row {
|
||||
sheet
|
||||
.write_number_with_format(row, col, mean, fmt)
|
||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||
} else {
|
||||
sheet
|
||||
.write_number(row, col, mean)
|
||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
.group_rows(first_detail_row, row - 1)
|
||||
.map_err(|e| format!("Failed to group rows: {e}"))?;
|
||||
}
|
||||
}
|
||||
|
||||
// Sample note
|
||||
if was_sampled {
|
||||
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
|
||||
let note_row = row + 1;
|
||||
let total_cols = (feat_indices.len() + 2) as u16;
|
||||
sheet
|
||||
.merge_range(
|
||||
|
|
@ -985,6 +1121,15 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outcode_of_splits_on_the_incode_space() {
|
||||
assert_eq!(outcode_of("E14 2DG"), "E14");
|
||||
assert_eq!(outcode_of("SW1A 1AA"), "SW1A");
|
||||
assert_eq!(outcode_of("M1 1AE"), "M1");
|
||||
// Defensive: a value with no space is treated as its own outcode.
|
||||
assert_eq!(outcode_of("E14"), "E14");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn export_query_deserializes_when_tt_is_a_single_string() {
|
||||
let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue