This commit is contained in:
Andras Schmelczer 2026-05-13 08:00:12 +01:00
parent 63713c3a2b
commit bd6b511f16
17 changed files with 544 additions and 377 deletions

View file

@ -17,27 +17,6 @@ POI_GROUPS_2KM = {
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
}
# Groups for which to compute distance to nearest POI (from filtered POIs).
# Keep `train_tube` for the existing backend feature; the individual POI
# distance filters below power the frontend dropdown.
DISTANCE_GROUPS = {
"train_tube": ["Tube station", "Rail station"],
"grocery_store": [
"Greengrocer",
"Supermarket",
"Convenience Store",
"Waitrose",
"Tesco",
],
"tube_station": ["Tube station"],
"rail_station": ["Rail station"],
"waitrose": ["Waitrose"],
"tesco": ["Tesco"],
"cafe": ["Café"],
"pub": ["Pub"],
"restaurant": ["Restaurant"],
}
# OS Open Greenspace function types used for park counts and distance calculation.
# Uses the authoritative OS dataset instead of OSM point POIs for better coverage
# of green spaces that are only mapped as polygons in OSM.
@ -48,6 +27,7 @@ GREENSPACE_PARK_FUNCTIONS = {
GROCERY_DYNAMIC_FILTER_MIN_POIS = 100
DYNAMIC_FILTER_ALL_GROUPS = {"Public Transport", "Leisure"}
DYNAMIC_FILTER_COUNT_THRESHOLD_GROUPS = {"Groceries"}
DYNAMIC_FILTER_EXCLUDED_CATEGORIES = {"Park"}
def _poi_category_slug(category: str) -> str:
@ -78,6 +58,7 @@ def _build_poi_category_groups(
& (pl.col("len") > GROCERY_DYNAMIC_FILTER_MIN_POIS)
)
)
.filter(~pl.col("category").is_in(list(DYNAMIC_FILTER_EXCLUDED_CATEGORIES)))
.select("category")
.sort("category")
.to_series()
@ -103,9 +84,11 @@ def _build_poi_category_groups(
def _dynamic_poi_metric_renames(display_names: dict[str, str]) -> dict[str, str]:
renames: dict[str, str] = {}
for group_key, category in display_names.items():
renames[f"{group_key}_nearest_km"] = f"Distance to nearest {category} POI (km)"
renames[f"{group_key}_2km"] = f"Number of {category} POIs within 2km"
renames[f"{group_key}_5km"] = f"Number of {category} POIs within 5km"
renames[f"{group_key}_nearest_km"] = (
f"Distance to nearest amenity ({category}) (km)"
)
renames[f"{group_key}_2km"] = f"Number of amenities ({category}) within 2km"
renames[f"{group_key}_5km"] = f"Number of amenities ({category}) within 5km"
return renames
@ -139,12 +122,12 @@ def main():
pois = pl.read_parquet(args.pois)
poi_category_groups, poi_display_names = _build_poi_category_groups(pois)
# Count amenity POIs within 2km
# Count static amenity groups within 2km.
counts_2km = count_pois_per_postcode(
postcodes, pois, groups=POI_GROUPS_2KM, radius_km=2
)
# Dynamic POI filters: nearest distance plus counts within 2km and 5km for
# Dynamic amenity filters: nearest distance plus counts within 2km and 5km for
# the selected public transport, grocery, and leisure categories.
dynamic_counts_2km = count_pois_per_postcode(
postcodes, pois, groups=poi_category_groups, radius_km=2
@ -166,25 +149,37 @@ def main():
{k: v for k, v in dynamic_renames.items() if k in dynamic_distances.columns}
)
# Distance to nearest train/tube station (from filtered POIs)
distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS)
# Park counts and distances from OS Open Greenspace
# Park counts and distances from OS Open Greenspace. They use the dynamic
# amenity metric names so filters read through the same side-table path as
# OSM-derived amenity metrics.
greenspace = pl.read_parquet(args.greenspace)
park_counts_1km = count_pois_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=1
park_counts_2km = count_pois_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=2
)
park_counts_5km = count_pois_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=5
)
park_distances = min_distance_per_postcode(
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS
)
park_renames = _dynamic_poi_metric_renames({"parks": "Park"})
park_counts_2km = park_counts_2km.rename(
{k: v for k, v in park_renames.items() if k in park_counts_2km.columns}
)
park_counts_5km = park_counts_5km.rename(
{k: v for k, v in park_renames.items() if k in park_counts_5km.columns}
)
park_distances = park_distances.rename(
{k: v for k, v in park_renames.items() if k in park_distances.columns}
)
# Join all results on postcode
result = (
counts_2km.join(distances, on="postcode")
.join(dynamic_counts_2km, on="postcode")
counts_2km.join(dynamic_counts_2km, on="postcode")
.join(dynamic_counts_5km, on="postcode")
.join(dynamic_distances, on="postcode")
.join(park_counts_1km, on="postcode")
.join(park_counts_2km, on="postcode")
.join(park_counts_5km, on="postcode")
.join(park_distances, on="postcode")
)