More fixes

This commit is contained in:
Andras Schmelczer 2026-03-12 20:27:04 +00:00
parent 791bc6976b
commit 14a3555cf1
21 changed files with 549 additions and 99 deletions

View file

@ -1,25 +1,29 @@
"""Compute POI proximity counts per postcode from ArcGIS + filtered POIs."""
"""Compute POI proximity counts and distances per postcode from ArcGIS + filtered POIs."""
import argparse
from pathlib import Path
import polars as pl
from pipeline.utils.poi_counts import count_pois_per_postcode
from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode
# POI category groups for proximity counting.
# POI category groups for proximity counting (2km radius).
# Names must match the friendly names produced by transform_poi.py / naptan.py.
POI_GROUPS = {
POI_GROUPS_2KM = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
"parks": ["Park"],
"public_transport": [
"Metro or Tram stop",
"Rail station",
"Bus stop",
"Bus station",
], # comes from naptan.py
}
# Train/tube stations counted at 1km radius
TRAIN_TUBE_GROUP = {
"train_tube": ["Metro or Tram stop", "Rail station"],
}
# Groups for which to compute distance to nearest POI
DISTANCE_GROUPS = {
"train_tube": ["Metro or Tram stop", "Rail station"],
}
@ -46,7 +50,21 @@ def main():
pois = pl.read_parquet(args.pois)
result = count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2)
# Count amenity POIs within 2km
counts_2km = count_pois_per_postcode(
postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2
)
# Count train/tube stations within 1km
counts_1km = count_pois_per_postcode(
postcodes, pois, groups=TRAIN_TUBE_GROUP, radius_km=1
)
# Distance to nearest train/tube station
distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS)
# Join all results on postcode
result = counts_2km.join(counts_1km, on="postcode").join(distances, on="postcode")
result.write_parquet(args.output)
size_mb = args.output.stat().st_size / (1024 * 1024)

View file

@ -16,8 +16,8 @@ def compute_voronoi_regions(
if len(points) == 1:
return {postcodes[0]: boundary}
# UPRN coordinates are int64 (BNG grid refs in whole meters).
# Convert to float64 so sub-meter jitter isn't truncated.
# UPRN coordinates are int64 (BNG grid refs in whole metres).
# Convert to float64 so sub-metre jitter isn't truncated.
points = points.astype(np.float64)
# Deduplicate points, keeping one per (location, postcode) pair.