This commit is contained in:
Andras Schmelczer 2026-02-07 19:13:36 +00:00
parent 555ba7cf53
commit 3c2e527328
6 changed files with 64 additions and 26 deletions

View file

@ -10,7 +10,11 @@ def transform_crime(crime_dir: Path, output_path: Path) -> None:
df = pl.scan_csv(
csvs,
schema_overrides={"LSOA code": pl.Utf8, "Crime type": pl.Utf8, "Month": pl.Utf8},
schema_overrides={
"LSOA code": pl.Utf8,
"Crime type": pl.Utf8,
"Month": pl.Utf8,
},
).select("LSOA code", "Crime type", "Month")
# Extract year, count crimes per LSOA / year / crime type

View file

@ -5,7 +5,7 @@ from pathlib import Path
import polars as pl
from pipeline.utils.poi_counts import _count_pois_per_postcode
from pipeline.utils.poi_counts import count_pois_per_postcode
# POI category groups for proximity counting
@ -13,11 +13,15 @@ POI_GROUPS = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Greengrocer", "Grocery Shop", "Supermarket", "Convenience Store"],
"parks": ["Park", "Garden", "Nature Reserve"],
"public_transport": ["Metro or Tram stop", "Rail station", "Bus stop", "Bus station"], # comes from naptan.py
"public_transport": [
"Metro or Tram stop",
"Rail station",
"Bus stop",
"Bus station",
], # comes from naptan.py
}
def main():
parser = argparse.ArgumentParser(
description="Count POIs within radius per postcode"
@ -41,7 +45,7 @@ def main():
pois = pl.read_parquet(args.pois)
result = _count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2)
result = count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2)
result.write_parquet(args.output)
size_mb = args.output.stat().st_size / (1024 * 1024)