Lint & small changes

This commit is contained in:
Andras Schmelczer 2026-04-04 22:59:07 +01:00
parent 0c6d207967
commit 55238f59aa
21 changed files with 2522 additions and 423 deletions

View file

@ -1,6 +1,13 @@
use crate::consts::NAN_U16;
use crate::data::QuantRef;
/// Optional per-enum-value distribution tracking for a single feature.
/// Counts how many rows have each enum value (by raw u16 index).
pub struct EnumDist {
pub feat_idx: usize,
pub counts: Box<[u32]>,
}
/// Per-cell accumulator for aggregating features (min/max/sum/count).
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
/// Shared by hexagon and postcode aggregation routes.
@ -10,16 +17,26 @@ pub struct Aggregator {
pub maxs: Box<[f32]>,
pub sums: Box<[f64]>,
pub feat_counts: Box<[u32]>,
/// Optional: per-value counts for a single enum feature (for pie chart visualization).
pub enum_dist: Option<EnumDist>,
}
/// Configuration for enum distribution tracking, passed to Aggregator::new.
/// (feature_index, number_of_enum_values)
pub type EnumDistConfig = Option<(usize, usize)>;
impl Aggregator {
pub fn new(num_features: usize) -> Self {
pub fn new(num_features: usize, enum_dist_config: EnumDistConfig) -> Self {
Aggregator {
count: 0,
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
sums: vec![0.0f64; num_features].into_boxed_slice(),
feat_counts: vec![0u32; num_features].into_boxed_slice(),
enum_dist: enum_dist_config.map(|(feat_idx, num_values)| EnumDist {
feat_idx,
counts: vec![0u32; num_values].into_boxed_slice(),
}),
}
}
@ -50,6 +67,17 @@ impl Aggregator {
self.feat_counts[feat_index] += 1;
}
}
// Enum distribution: single branch per row (not per feature).
// Uses raw u16 directly — enum features are stored as u16 indices.
if let Some(ref mut ed) = self.enum_dist {
let raw = row_slice[ed.feat_idx];
if raw != NAN_U16 {
let idx = raw as usize;
if idx < ed.counts.len() {
ed.counts[idx] += 1;
}
}
}
}
/// Merge another aggregator's results into this one.
@ -67,6 +95,12 @@ impl Aggregator {
self.feat_counts[i] += other.feat_counts[i];
}
}
// Merge enum distribution counts
if let (Some(ref mut mine), Some(ref theirs)) = (&mut self.enum_dist, &other.enum_dist) {
for (m, t) in mine.counts.iter_mut().zip(theirs.counts.iter()) {
*m += t;
}
}
}
/// Add a row, only aggregating the features at the given indices.
@ -95,5 +129,15 @@ impl Aggregator {
self.feat_counts[feat_index] += 1;
}
}
// Enum distribution (same raw u16 approach)
if let Some(ref mut ed) = self.enum_dist {
let raw = feature_data[base + ed.feat_idx];
if raw != NAN_U16 {
let idx = raw as usize;
if idx < ed.counts.len() {
ed.counts[idx] += 1;
}
}
}
}
}