perfect-postcode/pipeline/transform/test_crime_hotspot_tiles.py
2026-05-31 20:20:41 +01:00

52 lines
2 KiB
Python

import json
from pipeline.transform.crime_hotspot_tiles import _write_geojsonseq
_HEADER = (
"Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,"
"LSOA code,LSOA name,Crime type,Last outcome category,Context"
)
def _row(lon, lat, month, crime_type):
return f",{month},F,F,{lon},{lat},On or near X,E01000001,L,{crime_type},U,"
def _write_csv(path, rows):
path.write_text("\n".join([_HEADER, *rows]) + "\n")
def test_write_geojsonseq_collapses_shared_anchors_into_weighted_features(tmp_path):
csv = tmp_path / "2024-01-test-street.csv"
_write_csv(
csv,
[
# Two incidents snapped to the exact same anchor/month/type -> one
# feature with count=2.
_row(-0.1, 51.5, "2024-01", "Burglary"),
_row(-0.1, 51.5, "2024-01", "Burglary"),
# Same coord, different crime type -> kept separate (per-type filter).
_row(-0.1, 51.5, "2024-01", "Robbery"),
# Out of bounds -> dropped entirely.
_row(-0.1, 80.0, "2024-01", "Burglary"),
# Missing coordinate -> dropped entirely.
_row("", "", "2024-01", "Burglary"),
],
)
out = tmp_path / "hotspots.geojsonseq"
feature_count, incident_count = _write_geojsonseq([csv], out)
features = [json.loads(line) for line in out.read_text().splitlines()]
assert feature_count == 2
assert incident_count == 3 # 2 burglaries + 1 robbery, in-bounds only
by_type = {f["properties"]["crime_type"]: f["properties"] for f in features}
# The busy anchor is a single feature carrying its full incident weight,
# so tippecanoe's density thinning can no longer silently erase it.
assert by_type["Burglary"]["count"] == 2
assert by_type["Burglary"]["weight"] == 2
assert by_type["Robbery"]["count"] == 1
# Geometry preserved as [lon, lat].
assert by_type["Burglary"]["count"] == 2
assert all(f["geometry"]["coordinates"] == [-0.1, 51.5] for f in features)