group by outcode
This commit is contained in:
parent
70ac7b95e6
commit
3fd64785a2
1 changed files with 192 additions and 47 deletions
|
|
@ -114,6 +114,92 @@ impl PostcodeExportAgg {
|
||||||
self.finite_counts[out_idx] += 1;
|
self.finite_counts[out_idx] += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fold another postcode's aggregate into this one (used to roll up an
|
||||||
|
/// outcode summary from its member postcodes).
|
||||||
|
fn merge_from(&mut self, other: &PostcodeExportAgg) {
|
||||||
|
self.count += other.count;
|
||||||
|
for i in 0..self.sums.len() {
|
||||||
|
self.sums[i] += other.sums[i];
|
||||||
|
self.finite_counts[i] += other.finite_counts[i];
|
||||||
|
}
|
||||||
|
for (&feat_idx, freqs) in &other.enum_freqs {
|
||||||
|
let entry = self.enum_freqs.entry(feat_idx).or_default();
|
||||||
|
for (&bits, &count) in freqs {
|
||||||
|
*entry.entry(bits).or_insert(0) += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The outcode (first part) of a UK postcode, e.g. "E14" from "E14 2DG".
|
||||||
|
fn outcode_of(postcode: &str) -> &str {
|
||||||
|
match postcode.find(' ') {
|
||||||
|
Some(space_idx) => &postcode[..space_idx],
|
||||||
|
None => postcode,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A set of postcodes sharing the same outcode, with a rolled-up aggregate for
|
||||||
|
/// the group's summary row. `members` indexes into the flat `postcode_aggs`.
|
||||||
|
struct OutcodeGroup {
|
||||||
|
outcode: String,
|
||||||
|
members: Vec<usize>,
|
||||||
|
summary: PostcodeExportAgg,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the per-feature cells (numeric mean or enum mode) for a single row,
|
||||||
|
/// shared between outcode summary rows and individual postcode rows.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
fn write_feature_cells(
|
||||||
|
sheet: &mut rust_xlsxwriter::Worksheet,
|
||||||
|
row: u32,
|
||||||
|
feat_indices: &[usize],
|
||||||
|
agg: &PostcodeExportAgg,
|
||||||
|
num_features: usize,
|
||||||
|
enum_indices: &FxHashMap<usize, ()>,
|
||||||
|
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||||
|
integer_feature_indices: &FxHashSet<usize>,
|
||||||
|
feat_num_fmts: &FxHashMap<usize, Format>,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
|
||||||
|
let col = (col_offset + 2) as u16;
|
||||||
|
|
||||||
|
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
|
||||||
|
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
|
||||||
|
if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count) {
|
||||||
|
let mode_f32 = f32::from_bits(mode_bits);
|
||||||
|
let mode_idx = mode_f32 as usize;
|
||||||
|
if let Some(values) = enum_values.get(&feat_idx) {
|
||||||
|
if mode_idx < values.len() {
|
||||||
|
sheet
|
||||||
|
.write_string(row, col, &values[mode_idx])
|
||||||
|
.map_err(|e| format!("Failed to write enum value: {e}"))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let fc = agg.finite_counts[feat_idx];
|
||||||
|
if fc > 0 {
|
||||||
|
let mean = if integer_feature_indices.contains(&feat_idx) {
|
||||||
|
(agg.sums[feat_idx] / fc as f64).round()
|
||||||
|
} else {
|
||||||
|
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
|
||||||
|
};
|
||||||
|
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
|
||||||
|
sheet
|
||||||
|
.write_number_with_format(row, col, mean, fmt)
|
||||||
|
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||||
|
} else {
|
||||||
|
sheet
|
||||||
|
.write_number(row, col, mean)
|
||||||
|
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract feature names referenced in the filters param (preserving order).
|
/// Extract feature names referenced in the filters param (preserving order).
|
||||||
|
|
@ -689,6 +775,40 @@ pub async fn get_export(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Group the postcodes by outcode, preserving the existing relevance order
|
||||||
|
// (property-count-desc in bounds mode, input order in list mode) for the
|
||||||
|
// groups themselves; postcodes within a group are sorted alphabetically.
|
||||||
|
// Each group carries a rolled-up summary aggregate for its header row.
|
||||||
|
let outcode_groups: Vec<OutcodeGroup> = {
|
||||||
|
let mut order: Vec<String> = Vec::new();
|
||||||
|
let mut by_outcode: FxHashMap<String, OutcodeGroup> = FxHashMap::default();
|
||||||
|
for (i, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
|
||||||
|
let outcode = outcode_of(&postcode_data.postcodes[*pc_idx]).to_string();
|
||||||
|
let group = by_outcode.entry(outcode.clone()).or_insert_with(|| {
|
||||||
|
order.push(outcode.clone());
|
||||||
|
OutcodeGroup {
|
||||||
|
outcode: outcode.clone(),
|
||||||
|
members: Vec::new(),
|
||||||
|
summary: PostcodeExportAgg::new(total_export_features),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
group.members.push(i);
|
||||||
|
group.summary.merge_from(agg);
|
||||||
|
}
|
||||||
|
for group in by_outcode.values_mut() {
|
||||||
|
group
|
||||||
|
.members
|
||||||
|
.sort_by(|&a, &b| {
|
||||||
|
postcode_data.postcodes[postcode_aggs[a].0]
|
||||||
|
.cmp(&postcode_data.postcodes[postcode_aggs[b].0])
|
||||||
|
});
|
||||||
|
}
|
||||||
|
order
|
||||||
|
.into_iter()
|
||||||
|
.map(|outcode| by_outcode.remove(&outcode).unwrap())
|
||||||
|
.collect()
|
||||||
|
};
|
||||||
|
|
||||||
// Build Excel workbook with two sheets
|
// Build Excel workbook with two sheets
|
||||||
let mut workbook = Workbook::new();
|
let mut workbook = Workbook::new();
|
||||||
|
|
||||||
|
|
@ -715,6 +835,10 @@ pub async fn get_export(
|
||||||
.set_font_color("#666666")
|
.set_font_color("#666666")
|
||||||
.set_align(FormatAlign::Left);
|
.set_align(FormatAlign::Left);
|
||||||
|
|
||||||
|
// Outcode summary-row formats (the collapsible group header).
|
||||||
|
let group_label_fmt = Format::new().set_bold().set_font_color("#1F4E79");
|
||||||
|
let group_count_fmt = Format::new().set_bold();
|
||||||
|
|
||||||
// Dashboard URL
|
// Dashboard URL
|
||||||
let dashboard_url = format!(
|
let dashboard_url = format!(
|
||||||
"{}/dashboard?{}",
|
"{}/dashboard?{}",
|
||||||
|
|
@ -821,63 +945,75 @@ pub async fn get_export(
|
||||||
.map_err(|e| format!("Failed to write desc: {e}"))?;
|
.map_err(|e| format!("Failed to write desc: {e}"))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Data rows
|
// Put the collapse/expand controls above each group so the bold
|
||||||
let data_start_row = desc_row + 1;
|
// outcode summary row acts as the header for its postcodes.
|
||||||
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
|
sheet.group_symbols_above(true);
|
||||||
let row = data_start_row + row_offset as u32;
|
|
||||||
|
|
||||||
|
// Data rows — one bold outcode summary row followed by its postcodes,
|
||||||
|
// the latter wrapped in a collapsible outline group.
|
||||||
|
let data_start_row = desc_row + 1;
|
||||||
|
let mut row = data_start_row;
|
||||||
|
for group in &outcode_groups {
|
||||||
|
// Outcode summary row (rolled up from the member postcodes).
|
||||||
|
let summary_row = row;
|
||||||
|
sheet
|
||||||
|
.write_string_with_format(summary_row, 0, &group.outcode, &group_label_fmt)
|
||||||
|
.map_err(|e| format!("Failed to write outcode: {e}"))?;
|
||||||
|
sheet
|
||||||
|
.write_number_with_format(
|
||||||
|
summary_row,
|
||||||
|
1,
|
||||||
|
group.summary.count as f64,
|
||||||
|
&group_count_fmt,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("Failed to write outcode count: {e}"))?;
|
||||||
|
write_feature_cells(
|
||||||
|
sheet,
|
||||||
|
summary_row,
|
||||||
|
feat_indices,
|
||||||
|
&group.summary,
|
||||||
|
num_features,
|
||||||
|
&enum_indices,
|
||||||
|
enum_values,
|
||||||
|
&integer_feature_indices,
|
||||||
|
&feat_num_fmts,
|
||||||
|
)?;
|
||||||
|
row += 1;
|
||||||
|
|
||||||
|
// Individual postcode rows for this outcode.
|
||||||
|
let first_detail_row = row;
|
||||||
|
for &member in &group.members {
|
||||||
|
let (pc_idx, agg) = &postcode_aggs[member];
|
||||||
sheet
|
sheet
|
||||||
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
|
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
|
||||||
.map_err(|e| format!("Failed to write postcode: {e}"))?;
|
.map_err(|e| format!("Failed to write postcode: {e}"))?;
|
||||||
|
|
||||||
sheet
|
sheet
|
||||||
.write_number(row, 1, agg.count as f64)
|
.write_number(row, 1, agg.count as f64)
|
||||||
.map_err(|e| format!("Failed to write count: {e}"))?;
|
.map_err(|e| format!("Failed to write count: {e}"))?;
|
||||||
|
write_feature_cells(
|
||||||
for (col_offset, &feat_idx) in feat_indices.iter().enumerate() {
|
sheet,
|
||||||
let col = (col_offset + 2) as u16;
|
row,
|
||||||
|
feat_indices,
|
||||||
if feat_idx < num_features && enum_indices.contains_key(&feat_idx) {
|
agg,
|
||||||
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
|
num_features,
|
||||||
if let Some((&mode_bits, _)) =
|
&enum_indices,
|
||||||
freqs.iter().max_by_key(|(_, &count)| count)
|
enum_values,
|
||||||
{
|
&integer_feature_indices,
|
||||||
let mode_f32 = f32::from_bits(mode_bits);
|
&feat_num_fmts,
|
||||||
let mode_idx = mode_f32 as usize;
|
|
||||||
if let Some(values) = enum_values.get(&feat_idx) {
|
|
||||||
if mode_idx < values.len() {
|
|
||||||
sheet.write_string(row, col, &values[mode_idx]).map_err(
|
|
||||||
|e| format!("Failed to write enum value: {e}"),
|
|
||||||
)?;
|
)?;
|
||||||
|
row += 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
if row > first_detail_row {
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let fc = agg.finite_counts[feat_idx];
|
|
||||||
if fc > 0 {
|
|
||||||
let mean = if integer_feature_indices.contains(&feat_idx) {
|
|
||||||
(agg.sums[feat_idx] / fc as f64).round()
|
|
||||||
} else {
|
|
||||||
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
|
|
||||||
};
|
|
||||||
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
|
|
||||||
sheet
|
sheet
|
||||||
.write_number_with_format(row, col, mean, fmt)
|
.group_rows(first_detail_row, row - 1)
|
||||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
.map_err(|e| format!("Failed to group rows: {e}"))?;
|
||||||
} else {
|
|
||||||
sheet
|
|
||||||
.write_number(row, col, mean)
|
|
||||||
.map_err(|e| format!("Failed to write numeric value: {e}"))?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sample note
|
// Sample note
|
||||||
if was_sampled {
|
if was_sampled {
|
||||||
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
|
let note_row = row + 1;
|
||||||
let total_cols = (feat_indices.len() + 2) as u16;
|
let total_cols = (feat_indices.len() + 2) as u16;
|
||||||
sheet
|
sheet
|
||||||
.merge_range(
|
.merge_range(
|
||||||
|
|
@ -985,6 +1121,15 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn outcode_of_splits_on_the_incode_space() {
|
||||||
|
assert_eq!(outcode_of("E14 2DG"), "E14");
|
||||||
|
assert_eq!(outcode_of("SW1A 1AA"), "SW1A");
|
||||||
|
assert_eq!(outcode_of("M1 1AE"), "M1");
|
||||||
|
// Defensive: a value with no space is treated as its own outcode.
|
||||||
|
assert_eq!(outcode_of("E14"), "E14");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn export_query_deserializes_when_tt_is_a_single_string() {
|
fn export_query_deserializes_when_tt_is_a_single_string() {
|
||||||
let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"
|
let uri: Uri = "/api/export?bounds=1,2,3,4&tt=transit%3Abank%3ABank%2520station%3A0%3A52"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue