perfect-postcode/pipeline/transform/test_crime_hotspot_tiles.py

import json

from pipeline.transform.crime_hotspot_tiles import _write_geojsonseq

_HEADER = (
    "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,"
    "LSOA code,LSOA name,Crime type,Last outcome category,Context"
)


def _row(lon, lat, month, crime_type):
    return f",{month},F,F,{lon},{lat},On or near X,E01000001,L,{crime_type},U,"


def _write_csv(path, rows):
    path.write_text("\n".join([_HEADER, *rows]) + "\n")


def test_write_geojsonseq_collapses_shared_anchors_into_weighted_features(tmp_path):
    csv = tmp_path / "2024-01-test-street.csv"
    _write_csv(
        csv,
        [
            # Two incidents snapped to the exact same anchor/month/type -> one
            # feature with count=2.
            _row(-0.1, 51.5, "2024-01", "Burglary"),
            _row(-0.1, 51.5, "2024-01", "Burglary"),
            # Same coord, different crime type -> kept separate (per-type filter).
            _row(-0.1, 51.5, "2024-01", "Robbery"),
            # Out of bounds -> dropped entirely.
            _row(-0.1, 80.0, "2024-01", "Burglary"),
            # Missing coordinate -> dropped entirely.
            _row("", "", "2024-01", "Burglary"),
        ],
    )

    out = tmp_path / "hotspots.geojsonseq"
    feature_count, incident_count = _write_geojsonseq([csv], out)

    features = [json.loads(line) for line in out.read_text().splitlines()]
    assert feature_count == 2
    assert incident_count == 3  # 2 burglaries + 1 robbery, in-bounds only

    by_type = {f["properties"]["crime_type"]: f["properties"] for f in features}
    # The busy anchor is a single feature carrying its full incident weight,
    # so tippecanoe's density thinning can no longer silently erase it.
    assert by_type["Burglary"]["count"] == 2
    assert by_type["Burglary"]["weight"] == 2
    assert by_type["Robbery"]["count"] == 1
    # Geometry preserved as [lon, lat].
    assert by_type["Burglary"]["count"] == 2
    assert all(f["geometry"]["coordinates"] == [-0.1, 51.5] for f in features)