Rust things

This commit is contained in:
Andras Schmelczer 2026-05-10 14:55:43 +01:00
parent fc10381692
commit 3debacab4f
30 changed files with 3257 additions and 647 deletions

View file

@ -1,5 +1,5 @@
use crate::consts::NAN_U16;
use crate::data::QuantRef;
use crate::data::{PostcodePoiMetrics, QuantRef};
/// Optional per-enum-value distribution tracking for a single feature.
/// Counts how many rows have each enum value (by raw u16 index).
@ -21,6 +21,69 @@ pub struct Aggregator {
pub enum_dist: Option<EnumDist>,
}
/// Accumulator for postcode-level POI metrics stored outside `feature_data`.
/// Only constructed when a request selects POI metric fields.
pub struct PoiAggregator {
pub mins: Box<[f32]>,
pub maxs: Box<[f32]>,
pub sums: Box<[f64]>,
pub counts: Box<[u32]>,
}
impl PoiAggregator {
pub fn new(num_features: usize) -> Self {
Self {
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
sums: vec![0.0f64; num_features].into_boxed_slice(),
counts: vec![0u32; num_features].into_boxed_slice(),
}
}
#[inline]
pub fn add_row_selective(
&mut self,
poi_metrics: &PostcodePoiMetrics,
row: usize,
indices: &[usize],
) {
let Some(metric_row) = poi_metrics.metric_row_for_property(row) else {
return;
};
for &metric_idx in indices {
let raw = poi_metrics.raw_for_metric_row(metric_row, metric_idx);
if raw == NAN_U16 {
continue;
}
let value = poi_metrics.decode_raw(metric_idx, raw);
if value < self.mins[metric_idx] {
self.mins[metric_idx] = value;
}
if value > self.maxs[metric_idx] {
self.maxs[metric_idx] = value;
}
self.sums[metric_idx] += value as f64;
self.counts[metric_idx] += 1;
}
}
pub fn merge(&mut self, other: &PoiAggregator) {
for i in 0..self.counts.len() {
if other.counts[i] == 0 {
continue;
}
if other.mins[i] < self.mins[i] {
self.mins[i] = other.mins[i];
}
if other.maxs[i] > self.maxs[i] {
self.maxs[i] = other.maxs[i];
}
self.sums[i] += other.sums[i];
self.counts[i] += other.counts[i];
}
}
}
/// Configuration for enum distribution tracking, passed to Aggregator::new.
/// (feature_index, number_of_enum_values)
pub type EnumDistConfig = Option<(usize, usize)>;