Improve data pipeline

2026-06-01 20:10:03 +01:00 · 2026-06-01 20:10:03 +01:00 · f99bd4e5c9
commit f99bd4e5c9
parent e8345cbdc1
36 changed files with 966 additions and 129 deletions
--- a/pipeline/transform/crime_hotspot_tiles.py
+++ b/pipeline/transform/crime_hotspot_tiles.py
@ -17,7 +17,7 @@ from pathlib import Path
 import polars as pl

 from pipeline.local_temp import local_tmp_dir
-from pipeline.transform.crime import find_street_crime_csvs
+from pipeline.transform.crime import LEGACY_CRIME_TYPE_ALIASES, find_street_crime_csvs


 def _latest_months(crime_dir: Path, month_count: int) -> list[str]:
@ -80,6 +80,10 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> tuple[int, int]:
        .drop_nulls(["lon", "lat"])
        .filter(pl.col("lon").is_between(-9.5, 5.0))
        .filter(pl.col("lat").is_between(49.0, 57.0))
+        # Canonicalise any legacy pre-2014 type names so the heatmap's crime_type
+        # values always match the frontend's canonical filter list (a no-op for
+        # the recent months this overlay normally covers).
+        .with_columns(pl.col("crime_type").replace(LEGACY_CRIME_TYPE_ALIASES))
        .group_by("lon", "lat", "month", "crime_type")
        .len()
        .rename({"len": "count"})