idk

2026-06-02 13:46:18 +01:00 · 2026-06-02 13:46:18 +01:00 · d43da9708c
commit d43da9708c
parent a04ac2d857
47 changed files with 4120 additions and 573 deletions
--- a/pipeline/transform/crime_spatial.py
+++ b/pipeline/transform/crime_spatial.py
@ -259,11 +259,14 @@ def _write_avg_yr(
    """
    months = np.array([months_in_year[year] for year in years], dtype=np.float64)
    per_year = counts.astype(np.float64) * 12.0 / months[None, None, :]
-    # Average over the years each type is actually observed anywhere -- the same
-    # per-type x-span the by-year chart plots (server-rs/.../crime_by_year.rs).
-    type_year_present = counts.sum(axis=0) > 0  # (n_types, n_years)
-    years_per_type = np.clip(type_year_present.sum(axis=1), 1, None).astype(np.float64)
-    avg = per_year.sum(axis=2) / years_per_type[None, :]  # (n_postcodes, n_types)
+    # Average over the years *this postcode* actually has incidents of *this
+    # type* -- the same per-(postcode, type) x-span the by-year chart plots
+    # (server-rs/.../crime_by_year.rs), so the headline equals the mean of the
+    # by-year bars. Dividing by a global years-present count (years a type
+    # appeared anywhere in England) would deflate postcodes whose incidents
+    # cluster in only a few years of the ~13-year window.
+    years_present = np.clip((counts > 0).sum(axis=2), 1, None).astype(np.float64)
+    avg = per_year.sum(axis=2) / years_present  # (n_postcodes, n_types)
    avg = np.round(avg * norm[:, None], 1).astype(np.float32)

    data: dict[str, np.ndarray] = {"postcode": postcodes}