This commit is contained in:
Andras Schmelczer 2026-06-02 20:14:32 +01:00
parent fbfebc651c
commit aab85fe32e
33 changed files with 2016 additions and 283 deletions

View file

@ -273,6 +273,28 @@ def _write_avg_yr(
for type_idx, name in enumerate(ALL_CRIME_TYPES):
data[f"{name} (avg/yr)"] = avg[:, type_idx]
# Serious/Minor rollup headlines, computed the SAME way as the by-year rollup
# bars (_write_by_year/_rollup_long): sum the rollup's types per year, then
# average over the years in which ANY of those types occurred. This keeps the
# headline equal to the mean of the "Serious/Minor crime (by year)" bars.
# Summing the per-type avg/yr values instead (as the merge previously did)
# divides each type by its OWN years-present and overstates the rollup when a
# postcode's serious/minor types occur in disjoint years.
for rollup_name, rollup_types in (
("Serious crime", SERIOUS_CRIME_TYPES),
("Minor crime", MINOR_CRIME_TYPES),
):
rollup_idx = [ALL_CRIME_TYPES.index(name) for name in rollup_types]
rollup_counts = counts[:, rollup_idx, :].sum(axis=1) # (n_postcodes, n_years)
rollup_per_year = per_year[:, rollup_idx, :].sum(axis=1)
rollup_years_present = np.clip(
(rollup_counts > 0).sum(axis=1), 1, None
).astype(np.float64)
rollup_avg = rollup_per_year.sum(axis=1) / rollup_years_present
data[f"{rollup_name} (avg/yr)"] = np.round(rollup_avg * norm, 1).astype(
np.float32
)
output_path.parent.mkdir(parents=True, exist_ok=True)
pl.DataFrame(data).write_parquet(output_path, compression="zstd")
print(f"Wrote postcode crime averages: {output_path}")