"""Build PMTiles point tiles for the crime heatmap overlay. The output intentionally keeps point features rather than H3/grid aggregates so MapLibre can render a true client-side heatmap. Police.uk coordinates are published anonymous map points, not exact offence locations. """ from __future__ import annotations import argparse import json import shutil import subprocess import tempfile from pathlib import Path import polars as pl from pipeline.local_temp import local_tmp_dir from pipeline.transform.crime import LEGACY_CRIME_TYPE_ALIASES, find_street_crime_csvs def _latest_months(crime_dir: Path, month_count: int) -> list[str]: csvs, _ignored = find_street_crime_csvs(crime_dir) months = sorted({path.parent.name for path in csvs}) if not months: raise FileNotFoundError(f"No street crime CSVs found in {crime_dir}") return months[-month_count:] def _street_csvs_for_months(crime_dir: Path, months: set[str]) -> list[Path]: csvs, _ignored = find_street_crime_csvs(crime_dir) selected = [path for path in csvs if path.parent.name in months] if not selected: raise FileNotFoundError(f"No street crime CSVs found for {sorted(months)}") return selected def _require_tippecanoe() -> str: executable = shutil.which("tippecanoe") if executable is None: raise RuntimeError( "tippecanoe is required to build crime hotspot PMTiles. " "Install tippecanoe and rerun this target." ) return executable def _write_geojsonseq(csvs: list[Path], output_path: Path) -> tuple[int, int]: """Write one weighted GeoJSON point per distinct (anchor, month, type). Returns ``(feature_count, incident_count)``. police.uk snaps every incident to a shared "map point" anchor, so many incidents land on the exact same coordinate. Collapsing them into one feature carrying ``count`` (the number of incidents) keeps the per-crime-type and per-month filters intact while turning each hotspot into a single high-weight point. That matters because tippecanoe's ``--drop-densest-as-needed`` thins *feature density*, not weight: with one feature per row the busiest streets were silently deleted; with one weighted feature per anchor those hotspots survive and the dropped detail is only redundant duplicate points. The heatmap reads ``count`` as its weight. """ grouped = ( pl.scan_csv( csvs, schema_overrides={ "Longitude": pl.Float64, "Latitude": pl.Float64, "Month": pl.Utf8, "Crime type": pl.Utf8, }, ignore_errors=True, ) .select( pl.col("Longitude").alias("lon"), pl.col("Latitude").alias("lat"), pl.col("Month").alias("month"), pl.col("Crime type").alias("crime_type"), ) .drop_nulls(["lon", "lat"]) .filter(pl.col("lon").is_between(-9.5, 5.0)) .filter(pl.col("lat").is_between(49.0, 57.0)) # Canonicalise any legacy pre-2014 type names so the heatmap's crime_type # values always match the frontend's canonical filter list (a no-op for # the recent months this overlay normally covers). .with_columns(pl.col("crime_type").replace(LEGACY_CRIME_TYPE_ALIASES)) .group_by("lon", "lat", "month", "crime_type") .len() .rename({"len": "count"}) .collect(engine="streaming") ) incident_count = int(grouped["count"].sum()) with output_path.open("w") as file: for row in grouped.iter_rows(named=True): feature = { "type": "Feature", "geometry": { "type": "Point", "coordinates": [row["lon"], row["lat"]], }, "properties": { "count": row["count"], "weight": row["count"], "month": row["month"], "crime_type": row["crime_type"], }, } file.write(json.dumps(feature, separators=(",", ":")) + "\n") return grouped.height, incident_count def build_crime_hotspot_tiles( crime_dir: Path, output_path: Path, months: int, min_zoom: int, max_zoom: int, ) -> None: tippecanoe = _require_tippecanoe() selected_months = set(_latest_months(crime_dir, months)) csvs = _street_csvs_for_months(crime_dir, selected_months) output_path.parent.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp: ndjson_path = Path(tmp) / "crime_hotspots.geojsonseq" feature_count, incident_count = _write_geojsonseq(csvs, ndjson_path) print( f"Writing {feature_count:,} weighted crime heatmap points " f"({incident_count:,} incidents) " f"from {min(selected_months)} to {max(selected_months)}" ) subprocess.run( [ tippecanoe, "--force", "--output", str(output_path), "--layer", "crime_hotspots", "--minimum-zoom", str(min_zoom), "--maximum-zoom", str(max_zoom), "--drop-densest-as-needed", "--extend-zooms-if-still-dropping", "--temporary-directory", tmp, str(ndjson_path), ], check=True, ) def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--input", type=Path, required=True, help="Crime CSV directory") parser.add_argument( "--output", type=Path, required=True, help="Output .pmtiles path" ) parser.add_argument( "--months", type=int, default=12, help="Latest complete months to include in the heatmap", ) parser.add_argument("--min-zoom", type=int, default=12) parser.add_argument("--max-zoom", type=int, default=16) args = parser.parse_args() build_crime_hotspot_tiles( args.input, args.output, args.months, args.min_zoom, args.max_zoom, ) if __name__ == "__main__": main()