Rerun prepare script

This commit is contained in:
Andras Schmelczer 2026-04-06 11:13:52 +01:00
parent 349a6c1d53
commit 8614acdfae
24 changed files with 1132 additions and 226 deletions

View file

@ -705,12 +705,19 @@ impl PropertyData {
})
.collect::<anyhow::Result<Vec<_>>>()?;
// Compute quantization parameters from feature stats (numeric features)
// Compute quantization parameters from feature stats (numeric features).
// For features with Fixed bounds, use those bounds so the full configured range
// is representable — the histogram refinement can narrow min/max to exclude
// "outliers" that are actually valid data (e.g. ethnicity percentages).
// For Percentile-bounded features, use the (possibly refined) histogram range
// so extreme outliers don't destroy precision for the main distribution.
let mut quant_min = Vec::with_capacity(num_features);
let mut quant_range = Vec::with_capacity(num_features);
for stats in &numeric_feature_stats {
let min = stats.histogram.min;
let max = stats.histogram.max;
for (feat_idx, stats) in numeric_feature_stats.iter().enumerate() {
let (min, max) = match features::bounds_for(numeric_names[feat_idx]) {
Some(Bounds::Fixed { min, max }) => (*min, *max),
_ => (stats.histogram.min, stats.histogram.max),
};
quant_min.push(min);
quant_range.push(if max > min { max - min } else { 0.0 });
}