Rerun prepare script

This commit is contained in:
Andras Schmelczer 2026-04-06 11:13:52 +01:00
parent 349a6c1d53
commit 8614acdfae
24 changed files with 1132 additions and 226 deletions

View file

@ -705,12 +705,19 @@ impl PropertyData {
})
.collect::<anyhow::Result<Vec<_>>>()?;
// Compute quantization parameters from feature stats (numeric features)
// Compute quantization parameters from feature stats (numeric features).
// For features with Fixed bounds, use those bounds so the full configured range
// is representable — the histogram refinement can narrow min/max to exclude
// "outliers" that are actually valid data (e.g. ethnicity percentages).
// For Percentile-bounded features, use the (possibly refined) histogram range
// so extreme outliers don't destroy precision for the main distribution.
let mut quant_min = Vec::with_capacity(num_features);
let mut quant_range = Vec::with_capacity(num_features);
for stats in &numeric_feature_stats {
let min = stats.histogram.min;
let max = stats.histogram.max;
for (feat_idx, stats) in numeric_feature_stats.iter().enumerate() {
let (min, max) = match features::bounds_for(numeric_names[feat_idx]) {
Some(Bounds::Fixed { min, max }) => (*min, *max),
_ => (stats.histogram.min, stats.histogram.max),
};
quant_min.push(min);
quant_range.push(if max > min { max - min } else { 0.0 });
}

View file

@ -143,8 +143,8 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
name: "Estimated monthly rent",
bounds: Bounds::Percentile { low: 2.0, high: 98.0 },
step: 25.0,
description: "Median monthly private rent for the local area",
detail: "Median monthly rental price from ONS Private Rental Market Summary Statistics (Oct 2022 - Sep 2023), matched by local authority and bedroom count. Based on Valuation Office Agency lettings data.",
description: "Mean monthly private rent for the local area",
detail: "Mean monthly rental price from ONS Price Index of Private Rents (PIPR), matched by local authority and bedroom count.",
source: "ons-rental",
prefix: "£",
suffix: "/mo",
@ -947,22 +947,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Majority (%)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.5,
description:
"Winning margin as a percentage of valid votes in the 2024 General Election",
detail: "The difference in votes between the winning candidate and the runner-up, expressed as a percentage of total valid votes cast. A small majority indicates a marginal seat (competitive); a large majority indicates a safe seat. From the July 2024 UK General Election results published by the UK Parliament.",
source: "election-results",
prefix: "",
suffix: "%",
raw: false,
absolute: false,
}),
],
},
FeatureGroup {