Improve data pipeline
This commit is contained in:
parent
e8345cbdc1
commit
f99bd4e5c9
36 changed files with 966 additions and 129 deletions
|
|
@ -279,3 +279,37 @@ def test_unknown_crime_type_is_dropped_with_warning(tmp_path, capsys):
|
|||
err = capsys.readouterr().err
|
||||
assert "Cyber fraud" in err
|
||||
assert "WARNING" in err
|
||||
|
||||
|
||||
def test_legacy_crime_types_are_mapped(tmp_path):
|
||||
"""Pre-2014 crime-type names are aliased to current equivalents in the
|
||||
spatial transform instead of being dropped as unknown types."""
|
||||
units = tmp_path / "units"
|
||||
_write_boundaries(
|
||||
units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
|
||||
)
|
||||
|
||||
crime = tmp_path / "crime"
|
||||
_write_month(
|
||||
crime,
|
||||
"2013-01",
|
||||
[
|
||||
_crime_row("2013-01", 1005, 1005, "Violent crime"),
|
||||
_crime_row("2013-01", 1005, 1005, "Public disorder and weapons"),
|
||||
],
|
||||
)
|
||||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
row = pl.read_parquet(output).to_dicts()[0]
|
||||
# Single postcode -> area-norm factor 1.0; single month/year -> x12.
|
||||
assert row["Violence and sexual offences (avg/yr)"] == 12.0
|
||||
assert row["Public order (avg/yr)"] == 12.0
|
||||
|
||||
by_year_row = pl.read_parquet(by_year).row(0, named=True)
|
||||
assert by_year_row["Violence and sexual offences (by year)"] == [
|
||||
{"year": 2013, "count": 12.0}
|
||||
]
|
||||
assert by_year_row["Public order (by year)"] == [{"year": 2013, "count": 12.0}]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue