"""Compute POI proximity counts and distances per postcode from ArcGIS + filtered POIs.""" import argparse from pathlib import Path import polars as pl from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode # POI category groups for proximity counting (2km radius). # Names must match the friendly names produced by transform_poi.py / naptan.py. POI_GROUPS_2KM = { "restaurants": ["Restaurant", "Fast Food"], "groceries": ["Greengrocer", "Supermarket", "Convenience Store"], } # Groups for which to compute distance to nearest POI (from filtered POIs) DISTANCE_GROUPS = { "train_tube": ["Tube station", "Rail station"], } # OS Open Greenspace function types used for park counts and distance calculation. # Uses the authoritative OS dataset instead of OSM point POIs for better coverage # of green spaces that are only mapped as polygons in OSM. GREENSPACE_PARK_FUNCTIONS = { "parks": ["Public Park Or Garden", "Playing Field", "Play Space"], } def main(): parser = argparse.ArgumentParser( description="Count POIs within radius per postcode" ) parser.add_argument( "--arcgis", type=Path, required=True, help="ArcGIS postcode parquet" ) parser.add_argument( "--pois", type=Path, required=True, help="Filtered POIs parquet" ) parser.add_argument( "--greenspace", type=Path, required=True, help="OS Open Greenspace centroids parquet", ) parser.add_argument( "--output", type=Path, required=True, help="Output parquet path" ) args = parser.parse_args() postcodes = pl.read_parquet(args.arcgis).select( pl.col("pcds").alias("postcode"), "lat", pl.col("long").alias("lon"), ) pois = pl.read_parquet(args.pois) # Count amenity POIs within 2km counts_2km = count_pois_per_postcode( postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2 ) # Distance to nearest train/tube station (from filtered POIs) distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS) # Park counts and distances from OS Open Greenspace greenspace = pl.read_parquet(args.greenspace) park_counts_1km = count_pois_per_postcode( postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=1 ) park_distances = min_distance_per_postcode( postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS ) # Join all results on postcode result = ( counts_2km.join(distances, on="postcode") .join(park_counts_1km, on="postcode") .join(park_distances, on="postcode") ) result.write_parquet(args.output) size_mb = args.output.stat().st_size / (1024 * 1024) print(f"Wrote {args.output} ({size_mb:.1f} MB)") if __name__ == "__main__": main()