"""Compute POI proximity counts and distances per postcode from ArcGIS + filtered POIs.""" import argparse from pathlib import Path import polars as pl from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode # POI category groups for proximity counting (2km radius). # Names must match the friendly names produced by transform_poi.py / naptan.py. POI_GROUPS_2KM = { "restaurants": ["Restaurant", "Fast Food"], "groceries": ["Greengrocer", "Supermarket", "Convenience Store"], "parks": ["Park"], } # Train/tube stations counted at 1km radius TRAIN_TUBE_GROUP = { "train_tube": ["Metro or Tram stop", "Rail station"], } # Groups for which to compute distance to nearest POI DISTANCE_GROUPS = { "train_tube": ["Metro or Tram stop", "Rail station"], } def main(): parser = argparse.ArgumentParser( description="Count POIs within radius per postcode" ) parser.add_argument( "--arcgis", type=Path, required=True, help="ArcGIS postcode parquet" ) parser.add_argument( "--pois", type=Path, required=True, help="Filtered POIs parquet" ) parser.add_argument( "--output", type=Path, required=True, help="Output parquet path" ) args = parser.parse_args() postcodes = pl.read_parquet(args.arcgis).select( pl.col("pcds").alias("postcode"), "lat", pl.col("long").alias("lon"), ) pois = pl.read_parquet(args.pois) # Count amenity POIs within 2km counts_2km = count_pois_per_postcode( postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2 ) # Count train/tube stations within 1km counts_1km = count_pois_per_postcode( postcodes, pois, groups=TRAIN_TUBE_GROUP, radius_km=1 ) # Distance to nearest train/tube station distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS) # Join all results on postcode result = counts_2km.join(counts_1km, on="postcode").join(distances, on="postcode") result.write_parquet(args.output) size_mb = args.output.stat().st_size / (1024 * 1024) print(f"Wrote {args.output} ({size_mb:.1f} MB)") if __name__ == "__main__": main()