perfect-postcode/pipeline/transform/poi_proximity.py
2026-03-12 20:27:04 +00:00

75 lines
2.2 KiB
Python

"""Compute POI proximity counts and distances per postcode from ArcGIS + filtered POIs."""
import argparse
from pathlib import Path
import polars as pl
from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode
# POI category groups for proximity counting (2km radius).
# Names must match the friendly names produced by transform_poi.py / naptan.py.
POI_GROUPS_2KM = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
"parks": ["Park"],
}
# Train/tube stations counted at 1km radius
TRAIN_TUBE_GROUP = {
"train_tube": ["Metro or Tram stop", "Rail station"],
}
# Groups for which to compute distance to nearest POI
DISTANCE_GROUPS = {
"train_tube": ["Metro or Tram stop", "Rail station"],
}
def main():
parser = argparse.ArgumentParser(
description="Count POIs within radius per postcode"
)
parser.add_argument(
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
)
parser.add_argument(
"--pois", type=Path, required=True, help="Filtered POIs parquet"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet path"
)
args = parser.parse_args()
postcodes = pl.read_parquet(args.arcgis).select(
pl.col("pcds").alias("postcode"),
"lat",
pl.col("long").alias("lon"),
)
pois = pl.read_parquet(args.pois)
# Count amenity POIs within 2km
counts_2km = count_pois_per_postcode(
postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2
)
# Count train/tube stations within 1km
counts_1km = count_pois_per_postcode(
postcodes, pois, groups=TRAIN_TUBE_GROUP, radius_km=1
)
# Distance to nearest train/tube station
distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS)
# Join all results on postcode
result = counts_2km.join(counts_1km, on="postcode").join(distances, on="postcode")
result.write_parquet(args.output)
size_mb = args.output.stat().st_size / (1024 * 1024)
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
if __name__ == "__main__":
main()