Fable findings in data
This commit is contained in:
parent
b98bc6d611
commit
6a33b03fdf
20 changed files with 1502 additions and 274 deletions
|
|
@ -258,10 +258,17 @@ pub fn compute_feature_stats(
|
|||
/// Compute property-weighted per-year crime means across the selection.
|
||||
///
|
||||
/// Each matching property contributes its postcode's per-year counts (incidents
|
||||
/// within 50m of that postcode); this is the same property-weighted-average
|
||||
/// shape used elsewhere in the right pane. Postcodes with no series for a given
|
||||
/// crime type contribute 0 for that type (matching how the `(avg/yr)` columns
|
||||
/// treat missing crime types).
|
||||
/// near that postcode); this is the same property-weighted-average shape used
|
||||
/// elsewhere in the right pane.
|
||||
///
|
||||
/// Denominators are COVERAGE-AWARE: police.uk has multi-year publication gaps
|
||||
/// for whole forces (e.g. Greater Manchester from 2019-07), and the pipeline
|
||||
/// emits a `covered_years` calendar per postcode. A postcode only counts toward
|
||||
/// a year's denominator if its force published that year — and only then does
|
||||
/// its missing bar mean a genuine zero. Years no selected postcode covers are
|
||||
/// omitted entirely (charted as gaps, not zeros). Postcodes without coverage
|
||||
/// info (legacy parquet without the column) count toward every year, restoring
|
||||
/// the previous behaviour.
|
||||
pub fn compute_crime_by_year(
|
||||
matching_rows: &[usize],
|
||||
data: &PropertyData,
|
||||
|
|
@ -273,27 +280,34 @@ pub fn compute_crime_by_year(
|
|||
return Vec::new();
|
||||
}
|
||||
|
||||
// For each crime type, accumulate per-year sums and the count of rows whose
|
||||
// postcode exists in the crime side table.
|
||||
let num_types = crime_by_year.crime_types.len();
|
||||
let mut per_type_year_sums: Vec<FxHashMap<i32, f64>> =
|
||||
(0..num_types).map(|_| FxHashMap::default()).collect();
|
||||
let mut per_type_row_counts: Vec<u32> = vec![0; num_types];
|
||||
// Per-year denominator parts: rows whose coverage calendar includes the
|
||||
// year, plus rows with no calendar at all (legacy: covered everywhere).
|
||||
let mut covered_counts: FxHashMap<i32, u32> = FxHashMap::default();
|
||||
let mut fully_covered_rows: u32 = 0;
|
||||
|
||||
for &row in matching_rows {
|
||||
let postcode = data.postcode(row);
|
||||
|
||||
// A postcode absent from the by-year table has no recorded crime within
|
||||
// 50m, so it contributes 0 to every type's per-year sum. It must still be
|
||||
// counted in the denominator: the matching `(avg/yr)` stat counts those
|
||||
// same zero-crime postcodes as 0.0 (crime_by_postcode.parquet has a dense
|
||||
// row for every boundary postcode), so excluding them here would compute
|
||||
// the chart over a smaller population and report a higher magnitude than
|
||||
// the headline. Property postcodes are guaranteed to be boundary
|
||||
// postcodes by the postcode-boundary-match validation, so "absent" means
|
||||
// genuinely zero-crime, not missing data.
|
||||
match crime_by_year.covered_years_by_postcode.get(postcode) {
|
||||
Some(years) => {
|
||||
// An empty list (force gap for the whole window / unusable
|
||||
// boundary geometry) adds nothing: the postcode's crime
|
||||
// picture is unknown and must not dilute any year's mean.
|
||||
for &year in years {
|
||||
*covered_counts.entry(year).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
None => fully_covered_rows += 1,
|
||||
}
|
||||
|
||||
// A postcode with a row but no series for a given type had no recorded
|
||||
// incidents of that type: it contributes 0 to the sums, and its covered
|
||||
// years still count in the denominator — a genuine zero. Uncovered
|
||||
// years are excluded via the denominators instead.
|
||||
if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
|
||||
// For every type the postcode reports, add its per-year counts.
|
||||
for series in series_list {
|
||||
let acc = &mut per_type_year_sums[series.type_idx as usize];
|
||||
for point in &series.points {
|
||||
|
|
@ -301,9 +315,6 @@ pub fn compute_crime_by_year(
|
|||
}
|
||||
}
|
||||
}
|
||||
for c in per_type_row_counts.iter_mut() {
|
||||
*c += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
|
@ -317,10 +328,6 @@ pub fn compute_crime_by_year(
|
|||
continue;
|
||||
}
|
||||
}
|
||||
let row_count = per_type_row_counts[type_idx];
|
||||
if row_count == 0 {
|
||||
continue;
|
||||
}
|
||||
let years = crime_by_year
|
||||
.years_by_type
|
||||
.get(type_idx)
|
||||
|
|
@ -329,15 +336,26 @@ pub fn compute_crime_by_year(
|
|||
if years.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let denom = row_count as f64;
|
||||
let sums = &per_type_year_sums[type_idx];
|
||||
let points: Vec<CrimeYearPoint> = years
|
||||
.iter()
|
||||
.map(|&year| CrimeYearPoint {
|
||||
year,
|
||||
count: (sums.get(&year).copied().unwrap_or(0.0) / denom) as f32,
|
||||
.filter_map(|&year| {
|
||||
let denom = fully_covered_rows
|
||||
+ covered_counts.get(&year).copied().unwrap_or(0);
|
||||
if denom == 0 {
|
||||
// No selected postcode has published data for this year.
|
||||
return None;
|
||||
}
|
||||
Some(CrimeYearPoint {
|
||||
year,
|
||||
count: (sums.get(&year).copied().unwrap_or(0.0) / denom as f64)
|
||||
as f32,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if points.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.push(CrimeYearStats {
|
||||
name: name.clone(),
|
||||
points,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue