idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -158,6 +158,53 @@ def test_transform_crime_writes_by_year_output(tmp_path):
|
|||
assert serious[2024] == 12.0
|
||||
|
||||
|
||||
def test_transform_crime_headline_is_mean_of_per_year_bars(tmp_path):
|
||||
"""The avg/yr headline must equal the average of the by-year chart bars, i.e.
|
||||
the simple mean of each year's annualised count -- NOT a month-weighted pooled
|
||||
rate. They diverge when years have uneven partial-month coverage."""
|
||||
crime_dir = tmp_path / "crime"
|
||||
jan23 = crime_dir / "2023-01"
|
||||
jan24 = crime_dir / "2024-01"
|
||||
feb24 = crime_dir / "2024-02"
|
||||
for d in (jan23, jan24, feb24):
|
||||
d.mkdir(parents=True)
|
||||
|
||||
header = "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context"
|
||||
# 2023: 6 burglaries in 1 month -> 6 * 12 / 1 = 72/yr.
|
||||
(jan23 / "2023-01-test-force-street.csv").write_text(
|
||||
"\n".join(
|
||||
[header]
|
||||
+ [
|
||||
f"{i},2023-01,F,F,-0.1,51.5,X,E01000001,L,Burglary,U,"
|
||||
for i in range(1, 7)
|
||||
]
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
# 2024: 2 burglaries across 2 months -> 2 * 12 / 2 = 12/yr.
|
||||
(jan24 / "2024-01-test-force-street.csv").write_text(
|
||||
"\n".join([header, "7,2024-01,F,F,-0.1,51.5,X,E01000001,L,Burglary,U,"]) + "\n"
|
||||
)
|
||||
(feb24 / "2024-02-test-force-street.csv").write_text(
|
||||
"\n".join([header, "8,2024-02,F,F,-0.1,51.5,X,E01000001,L,Burglary,U,"]) + "\n"
|
||||
)
|
||||
|
||||
output = tmp_path / "crime.parquet"
|
||||
by_year_output = tmp_path / "crime_by_year.parquet"
|
||||
transform_crime(crime_dir, output, by_year_output)
|
||||
|
||||
# Mean of per-year bars = (72 + 12) / 2 = 42.0.
|
||||
# The old pooled rate (8 incidents / 3 months * 12 = 32.0) would be wrong.
|
||||
avg = pl.read_parquet(output).to_dicts()[0]
|
||||
assert avg["Burglary (avg/yr)"] == 42.0
|
||||
|
||||
by_year = pl.read_parquet(by_year_output).row(0, named=True)
|
||||
burglary = {p["year"]: p["count"] for p in by_year["Burglary (by year)"]}
|
||||
assert burglary == {2023: 72.0, 2024: 12.0}
|
||||
# Headline equals the mean of the bars it summarises.
|
||||
assert avg["Burglary (avg/yr)"] == sum(burglary.values()) / len(burglary)
|
||||
|
||||
|
||||
def test_transform_crime_fails_without_valid_months(tmp_path):
|
||||
crime_dir = tmp_path / "crime"
|
||||
month_dir = crime_dir / "2024-01"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue