This commit is contained in:
Andras Schmelczer 2026-06-02 20:14:32 +01:00
parent fbfebc651c
commit aab85fe32e
33 changed files with 2016 additions and 283 deletions

View file

@ -252,6 +252,47 @@ def test_avg_yr_is_simple_mean_of_year_bars(tmp_path):
assert bars == {2023: pytest.approx(24.0, abs=0.05), 2024: pytest.approx(12.0, abs=0.05)}
def test_serious_rollup_avg_yr_equals_mean_of_rollup_bars(tmp_path):
# Two SERIOUS types occur in DISJOINT years for one postcode: Burglary only in
# 2014, Robbery only in 2024 (each a single full month -> 12/yr). The headline
# "Serious crime (avg/yr)" must equal the mean of the "Serious crime (by year)"
# bars (which span the UNION of years any serious type occurred), NOT the sum
# of the per-type means. Summing per-type means divides each type by its OWN
# years-present (1 each) -> 12 + 12 = 24; the consistent rollup divides the
# per-year serious total by the years any serious type occurred (2) -> 12.
units = tmp_path / "units"
_write_boundaries(
units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
)
crime = tmp_path / "crime"
_write_month(crime, "2014-01", [_crime_row("2014-01", 1005, 1005, "Burglary")])
_write_month(crime, "2024-01", [_crime_row("2024-01", 1005, 1005, "Robbery")])
output = tmp_path / "crime_by_postcode.parquet"
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
avg = pl.read_parquet(output).row(0, named=True)
# The precomputed rollup headline exists and equals the mean of the bars (12),
# not the sum of the per-type avg/yr values (Burglary 12 + Robbery 12 = 24).
assert "Serious crime (avg/yr)" in avg
assert avg["Burglary (avg/yr)"] == pytest.approx(12.0, abs=0.05)
assert avg["Robbery (avg/yr)"] == pytest.approx(12.0, abs=0.05)
assert avg["Serious crime (avg/yr)"] == pytest.approx(12.0, abs=0.05)
serious_bars = {
p["year"]: p["count"]
for p in pl.read_parquet(by_year).row(0, named=True)["Serious crime (by year)"]
}
assert serious_bars == {
2014: pytest.approx(12.0, abs=0.05),
2024: pytest.approx(12.0, abs=0.05),
}
mean_of_bars = sum(serious_bars.values()) / len(serious_bars)
assert avg["Serious crime (avg/yr)"] == pytest.approx(mean_of_bars, abs=0.05)
def test_avg_yr_denominator_is_per_postcode_not_global(tmp_path):
# P (AB1 1AA) has burglaries only in its single most-recent year (2024); Q
# (AB1 1AB), far away, has a burglary in 2014. The type therefore spans TWO