Rerun data pipelines
This commit is contained in:
parent
4c95815dc8
commit
fc10381692
27 changed files with 2143 additions and 215 deletions
47
pipeline/transform/test_crime.py
Normal file
47
pipeline/transform/test_crime.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import polars as pl
|
||||
|
||||
from pipeline.transform.crime import find_street_crime_csvs, transform_crime
|
||||
|
||||
|
||||
def test_find_street_crime_csvs_ignores_archive_sidecars(tmp_path):
|
||||
crime_dir = tmp_path / "crime"
|
||||
month_dir = crime_dir / "2024-01"
|
||||
month_dir.mkdir(parents=True)
|
||||
street = month_dir / "2024-01-test-force-street.csv"
|
||||
street.touch()
|
||||
(month_dir / "2024-01-test-force-outcomes.csv").touch()
|
||||
(month_dir / "2024-01-test-force-stop-and-search.csv").touch()
|
||||
(crime_dir / "notes.csv").touch()
|
||||
|
||||
csvs, ignored_count = find_street_crime_csvs(crime_dir)
|
||||
|
||||
assert csvs == [street]
|
||||
assert ignored_count == 3
|
||||
|
||||
|
||||
def test_transform_crime_reads_only_street_crime_csvs(tmp_path):
|
||||
crime_dir = tmp_path / "crime"
|
||||
month_dir = crime_dir / "2024-01"
|
||||
month_dir.mkdir(parents=True)
|
||||
|
||||
(month_dir / "2024-01-test-force-street.csv").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context",
|
||||
"1,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
|
||||
"2,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,E01000001,Test LSOA,Burglary,Under investigation,",
|
||||
"3,2024-01,Test Force,Test Force,-0.1,51.5,On or near Test Street,,No LSOA,Robbery,Under investigation,",
|
||||
]
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
(month_dir / "2024-01-test-force-outcomes.csv").write_text(
|
||||
"Crime ID,Month,Reported by,Outcome type\n1,2024-01,Test Force,Charged\n"
|
||||
)
|
||||
|
||||
output = tmp_path / "crime.parquet"
|
||||
transform_crime(crime_dir, output)
|
||||
|
||||
result = pl.read_parquet(output).to_dicts()
|
||||
|
||||
assert result == [{"LSOA code": "E01000001", "Burglary (avg/yr)": 2.0}]
|
||||
Loading…
Add table
Add a link
Reference in a new issue