Improve data pipeline
This commit is contained in:
parent
e8345cbdc1
commit
f99bd4e5c9
36 changed files with 966 additions and 129 deletions
|
|
@ -17,7 +17,7 @@ from pathlib import Path
|
|||
import polars as pl
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.transform.crime import find_street_crime_csvs
|
||||
from pipeline.transform.crime import LEGACY_CRIME_TYPE_ALIASES, find_street_crime_csvs
|
||||
|
||||
|
||||
def _latest_months(crime_dir: Path, month_count: int) -> list[str]:
|
||||
|
|
@ -80,6 +80,10 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> tuple[int, int]:
|
|||
.drop_nulls(["lon", "lat"])
|
||||
.filter(pl.col("lon").is_between(-9.5, 5.0))
|
||||
.filter(pl.col("lat").is_between(49.0, 57.0))
|
||||
# Canonicalise any legacy pre-2014 type names so the heatmap's crime_type
|
||||
# values always match the frontend's canonical filter list (a no-op for
|
||||
# the recent months this overlay normally covers).
|
||||
.with_columns(pl.col("crime_type").replace(LEGACY_CRIME_TYPE_ALIASES))
|
||||
.group_by("lon", "lat", "month", "crime_type")
|
||||
.len()
|
||||
.rename({"len": "count"})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue