52 lines
2 KiB
Python
52 lines
2 KiB
Python
import json
|
|
|
|
from pipeline.transform.crime_hotspot_tiles import _write_geojsonseq
|
|
|
|
_HEADER = (
|
|
"Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,"
|
|
"LSOA code,LSOA name,Crime type,Last outcome category,Context"
|
|
)
|
|
|
|
|
|
def _row(lon, lat, month, crime_type):
|
|
return f",{month},F,F,{lon},{lat},On or near X,E01000001,L,{crime_type},U,"
|
|
|
|
|
|
def _write_csv(path, rows):
|
|
path.write_text("\n".join([_HEADER, *rows]) + "\n")
|
|
|
|
|
|
def test_write_geojsonseq_collapses_shared_anchors_into_weighted_features(tmp_path):
|
|
csv = tmp_path / "2024-01-test-street.csv"
|
|
_write_csv(
|
|
csv,
|
|
[
|
|
# Two incidents snapped to the exact same anchor/month/type -> one
|
|
# feature with count=2.
|
|
_row(-0.1, 51.5, "2024-01", "Burglary"),
|
|
_row(-0.1, 51.5, "2024-01", "Burglary"),
|
|
# Same coord, different crime type -> kept separate (per-type filter).
|
|
_row(-0.1, 51.5, "2024-01", "Robbery"),
|
|
# Out of bounds -> dropped entirely.
|
|
_row(-0.1, 80.0, "2024-01", "Burglary"),
|
|
# Missing coordinate -> dropped entirely.
|
|
_row("", "", "2024-01", "Burglary"),
|
|
],
|
|
)
|
|
|
|
out = tmp_path / "hotspots.geojsonseq"
|
|
feature_count, incident_count = _write_geojsonseq([csv], out)
|
|
|
|
features = [json.loads(line) for line in out.read_text().splitlines()]
|
|
assert feature_count == 2
|
|
assert incident_count == 3 # 2 burglaries + 1 robbery, in-bounds only
|
|
|
|
by_type = {f["properties"]["crime_type"]: f["properties"] for f in features}
|
|
# The busy anchor is a single feature carrying its full incident weight,
|
|
# so tippecanoe's density thinning can no longer silently erase it.
|
|
assert by_type["Burglary"]["count"] == 2
|
|
assert by_type["Burglary"]["weight"] == 2
|
|
assert by_type["Robbery"]["count"] == 1
|
|
# Geometry preserved as [lon, lat].
|
|
assert by_type["Burglary"]["count"] == 2
|
|
assert all(f["geometry"]["coordinates"] == [-0.1, 51.5] for f in features)
|