Improve data
This commit is contained in:
parent
b4d66a28c1
commit
85da1941aa
31 changed files with 901 additions and 319 deletions
|
|
@ -252,14 +252,15 @@ def test_avg_yr_is_simple_mean_of_year_bars(tmp_path):
|
|||
assert bars == {2023: pytest.approx(24.0, abs=0.05), 2024: pytest.approx(12.0, abs=0.05)}
|
||||
|
||||
|
||||
def test_serious_rollup_avg_yr_equals_mean_of_rollup_bars(tmp_path):
|
||||
def test_serious_rollup_avg_yr_equals_sum_of_components(tmp_path):
|
||||
# Two SERIOUS types occur in DISJOINT years for one postcode: Burglary only in
|
||||
# 2014, Robbery only in 2024 (each a single full month -> 12/yr). The headline
|
||||
# "Serious crime (avg/yr)" must equal the mean of the "Serious crime (by year)"
|
||||
# bars (which span the UNION of years any serious type occurred), NOT the sum
|
||||
# of the per-type means. Summing per-type means divides each type by its OWN
|
||||
# years-present (1 each) -> 12 + 12 = 24; the consistent rollup divides the
|
||||
# per-year serious total by the years any serious type occurred (2) -> 12.
|
||||
# "Serious crime (avg/yr)" must equal the SUM of its component (avg/yr) columns
|
||||
# (Burglary 12 + Robbery 12 = 24), so the rollup is always the sum of the parts
|
||||
# shown beside it and can never fall below a single component. (The previous
|
||||
# union-years-present mean would have divided the per-year serious total by the
|
||||
# 2 years any serious type occurred, giving a misleading 12 that sits below
|
||||
# both the burglary and robbery rollup contributions.)
|
||||
units = tmp_path / "units"
|
||||
_write_boundaries(
|
||||
units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
|
||||
|
|
@ -274,13 +275,16 @@ def test_serious_rollup_avg_yr_equals_mean_of_rollup_bars(tmp_path):
|
|||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
avg = pl.read_parquet(output).row(0, named=True)
|
||||
# The precomputed rollup headline exists and equals the mean of the bars (12),
|
||||
# not the sum of the per-type avg/yr values (Burglary 12 + Robbery 12 = 24).
|
||||
assert "Serious crime (avg/yr)" in avg
|
||||
assert avg["Burglary (avg/yr)"] == pytest.approx(12.0, abs=0.05)
|
||||
assert avg["Robbery (avg/yr)"] == pytest.approx(12.0, abs=0.05)
|
||||
assert avg["Serious crime (avg/yr)"] == pytest.approx(12.0, abs=0.05)
|
||||
# Rollup == sum of its component (avg/yr) columns.
|
||||
assert avg["Serious crime (avg/yr)"] == pytest.approx(24.0, abs=0.05)
|
||||
assert avg["Serious crime (avg/yr)"] == pytest.approx(
|
||||
avg["Burglary (avg/yr)"] + avg["Robbery (avg/yr)"], abs=0.05
|
||||
)
|
||||
|
||||
# The by-year rollup series remains the per-year sum of the component bars.
|
||||
serious_bars = {
|
||||
p["year"]: p["count"]
|
||||
for p in pl.read_parquet(by_year).row(0, named=True)["Serious crime (by year)"]
|
||||
|
|
@ -289,8 +293,6 @@ def test_serious_rollup_avg_yr_equals_mean_of_rollup_bars(tmp_path):
|
|||
2014: pytest.approx(12.0, abs=0.05),
|
||||
2024: pytest.approx(12.0, abs=0.05),
|
||||
}
|
||||
mean_of_bars = sum(serious_bars.values()) / len(serious_bars)
|
||||
assert avg["Serious crime (avg/yr)"] == pytest.approx(mean_of_bars, abs=0.05)
|
||||
|
||||
|
||||
def test_avg_yr_denominator_is_per_postcode_not_global(tmp_path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue