Fable findings in data

This commit is contained in:
Andras Schmelczer 2026-06-11 07:49:23 +01:00
parent b98bc6d611
commit 6a33b03fdf
20 changed files with 1502 additions and 274 deletions

View file

@ -258,10 +258,17 @@ pub fn compute_feature_stats(
/// Compute property-weighted per-year crime means across the selection.
///
/// Each matching property contributes its postcode's per-year counts (incidents
/// within 50m of that postcode); this is the same property-weighted-average
/// shape used elsewhere in the right pane. Postcodes with no series for a given
/// crime type contribute 0 for that type (matching how the `(avg/yr)` columns
/// treat missing crime types).
/// near that postcode); this is the same property-weighted-average shape used
/// elsewhere in the right pane.
///
/// Denominators are COVERAGE-AWARE: police.uk has multi-year publication gaps
/// for whole forces (e.g. Greater Manchester from 2019-07), and the pipeline
/// emits a `covered_years` calendar per postcode. A postcode only counts toward
/// a year's denominator if its force published that year — and only then does
/// its missing bar mean a genuine zero. Years no selected postcode covers are
/// omitted entirely (charted as gaps, not zeros). Postcodes without coverage
/// info (legacy parquet without the column) count toward every year, restoring
/// the previous behaviour.
pub fn compute_crime_by_year(
matching_rows: &[usize],
data: &PropertyData,
@ -273,27 +280,34 @@ pub fn compute_crime_by_year(
return Vec::new();
}
// For each crime type, accumulate per-year sums and the count of rows whose
// postcode exists in the crime side table.
let num_types = crime_by_year.crime_types.len();
let mut per_type_year_sums: Vec<FxHashMap<i32, f64>> =
(0..num_types).map(|_| FxHashMap::default()).collect();
let mut per_type_row_counts: Vec<u32> = vec![0; num_types];
// Per-year denominator parts: rows whose coverage calendar includes the
// year, plus rows with no calendar at all (legacy: covered everywhere).
let mut covered_counts: FxHashMap<i32, u32> = FxHashMap::default();
let mut fully_covered_rows: u32 = 0;
for &row in matching_rows {
let postcode = data.postcode(row);
// A postcode absent from the by-year table has no recorded crime within
// 50m, so it contributes 0 to every type's per-year sum. It must still be
// counted in the denominator: the matching `(avg/yr)` stat counts those
// same zero-crime postcodes as 0.0 (crime_by_postcode.parquet has a dense
// row for every boundary postcode), so excluding them here would compute
// the chart over a smaller population and report a higher magnitude than
// the headline. Property postcodes are guaranteed to be boundary
// postcodes by the postcode-boundary-match validation, so "absent" means
// genuinely zero-crime, not missing data.
match crime_by_year.covered_years_by_postcode.get(postcode) {
Some(years) => {
// An empty list (force gap for the whole window / unusable
// boundary geometry) adds nothing: the postcode's crime
// picture is unknown and must not dilute any year's mean.
for &year in years {
*covered_counts.entry(year).or_insert(0) += 1;
}
}
None => fully_covered_rows += 1,
}
// A postcode with a row but no series for a given type had no recorded
// incidents of that type: it contributes 0 to the sums, and its covered
// years still count in the denominator — a genuine zero. Uncovered
// years are excluded via the denominators instead.
if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
// For every type the postcode reports, add its per-year counts.
for series in series_list {
let acc = &mut per_type_year_sums[series.type_idx as usize];
for point in &series.points {
@ -301,9 +315,6 @@ pub fn compute_crime_by_year(
}
}
}
for c in per_type_row_counts.iter_mut() {
*c += 1;
}
}
let mut out = Vec::new();
@ -317,10 +328,6 @@ pub fn compute_crime_by_year(
continue;
}
}
let row_count = per_type_row_counts[type_idx];
if row_count == 0 {
continue;
}
let years = crime_by_year
.years_by_type
.get(type_idx)
@ -329,15 +336,26 @@ pub fn compute_crime_by_year(
if years.is_empty() {
continue;
}
let denom = row_count as f64;
let sums = &per_type_year_sums[type_idx];
let points: Vec<CrimeYearPoint> = years
.iter()
.map(|&year| CrimeYearPoint {
year,
count: (sums.get(&year).copied().unwrap_or(0.0) / denom) as f32,
.filter_map(|&year| {
let denom = fully_covered_rows
+ covered_counts.get(&year).copied().unwrap_or(0);
if denom == 0 {
// No selected postcode has published data for this year.
return None;
}
Some(CrimeYearPoint {
year,
count: (sums.get(&year).copied().unwrap_or(0.0) / denom as f64)
as f32,
})
})
.collect();
if points.is_empty() {
continue;
}
out.push(CrimeYearStats {
name: name.clone(),
points,