75 lines
2.2 KiB
Python
75 lines
2.2 KiB
Python
"""Compute POI proximity counts and distances per postcode from ArcGIS + filtered POIs."""
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
|
|
from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode
|
|
|
|
|
|
# POI category groups for proximity counting (2km radius).
|
|
# Names must match the friendly names produced by transform_poi.py / naptan.py.
|
|
POI_GROUPS_2KM = {
|
|
"restaurants": ["Restaurant", "Fast Food"],
|
|
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
|
|
"parks": ["Park"],
|
|
}
|
|
|
|
# Train/tube stations counted at 1km radius
|
|
TRAIN_TUBE_GROUP = {
|
|
"train_tube": ["Metro or Tram stop", "Rail station"],
|
|
}
|
|
|
|
# Groups for which to compute distance to nearest POI
|
|
DISTANCE_GROUPS = {
|
|
"train_tube": ["Metro or Tram stop", "Rail station"],
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Count POIs within radius per postcode"
|
|
)
|
|
parser.add_argument(
|
|
"--arcgis", type=Path, required=True, help="ArcGIS postcode parquet"
|
|
)
|
|
parser.add_argument(
|
|
"--pois", type=Path, required=True, help="Filtered POIs parquet"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output parquet path"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
postcodes = pl.read_parquet(args.arcgis).select(
|
|
pl.col("pcds").alias("postcode"),
|
|
"lat",
|
|
pl.col("long").alias("lon"),
|
|
)
|
|
|
|
pois = pl.read_parquet(args.pois)
|
|
|
|
# Count amenity POIs within 2km
|
|
counts_2km = count_pois_per_postcode(
|
|
postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2
|
|
)
|
|
|
|
# Count train/tube stations within 1km
|
|
counts_1km = count_pois_per_postcode(
|
|
postcodes, pois, groups=TRAIN_TUBE_GROUP, radius_km=1
|
|
)
|
|
|
|
# Distance to nearest train/tube station
|
|
distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS)
|
|
|
|
# Join all results on postcode
|
|
result = counts_2km.join(counts_1km, on="postcode").join(distances, on="postcode")
|
|
|
|
result.write_parquet(args.output)
|
|
size_mb = args.output.stat().st_size / (1024 * 1024)
|
|
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|