Refactor and other improvements

This commit is contained in:
Andras Schmelczer 2026-02-08 18:25:58 +00:00
parent 04a78e7bfe
commit 6c90cf3c0f
47 changed files with 2705 additions and 1568 deletions

View file

@ -0,0 +1,70 @@
/// Per-cell accumulator for aggregating features (min/max/sum/count).
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
/// Shared by hexagon and postcode aggregation routes.
pub struct Aggregator {
pub count: u32,
pub mins: Box<[f32]>,
pub maxs: Box<[f32]>,
pub sums: Box<[f64]>,
pub feat_counts: Box<[u32]>,
}
impl Aggregator {
pub fn new(num_features: usize) -> Self {
Aggregator {
count: 0,
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
sums: vec![0.0f64; num_features].into_boxed_slice(),
feat_counts: vec![0u32; num_features].into_boxed_slice(),
}
}
/// Add a row using row-major feature_data layout.
/// feature_data[row * num_features + feat_idx] — all features for one row
/// are contiguous, so this reads a single cache line per ~8 features.
#[inline]
pub fn add_row(&mut self, feature_data: &[f32], row: usize, num_features: usize) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (feat_index, &value) in row_slice.iter().enumerate() {
if value.is_finite() {
if value < self.mins[feat_index] {
self.mins[feat_index] = value;
}
if value > self.maxs[feat_index] {
self.maxs[feat_index] = value;
}
self.sums[feat_index] += value as f64;
self.feat_counts[feat_index] += 1;
}
}
}
/// Add a row, only aggregating the features at the given indices.
#[inline]
pub fn add_row_selective(
&mut self,
feature_data: &[f32],
row: usize,
num_features: usize,
indices: &[usize],
) {
self.count += 1;
let base = row * num_features;
for &feat_index in indices {
let value = feature_data[base + feat_index];
if value.is_finite() {
if value < self.mins[feat_index] {
self.mins[feat_index] = value;
}
if value > self.maxs[feat_index] {
self.maxs[feat_index] = value;
}
self.sums[feat_index] += value as f64;
self.feat_counts[feat_index] += 1;
}
}
}
}