perfect-postcode/pipeline/transform/test_poi_proximity.py
2026-06-02 13:46:18 +01:00

90 lines
2.8 KiB
Python

import polars as pl
from pipeline.transform.poi_proximity import (
POI_GROUPS_2KM,
_build_poi_category_groups,
_dynamic_poi_metric_renames,
_groceries_categories,
)
from pipeline.utils.poi_counts import count_pois_per_postcode
def test_groceries_2km_counts_geolytix_brand_categories() -> None:
"""The static groceries 2km count must include GEOLYTIX brand POIs.
GEOLYTIX stores the brand (e.g. "Tesco") in `category` with group
"Groceries" and never emits the literal "Supermarket"; matching only the
OSM strings counts the supermarket but drops the brand store.
"""
postcodes = pl.DataFrame(
{
"postcode": ["SW1A 1AA"],
"lat": [51.5010],
"lon": [-0.1416],
}
)
pois = pl.DataFrame(
{
"category": ["Tesco", "Supermarket"],
"group": ["Groceries", "Groceries"],
"lat": [51.5011, 51.5012],
"lng": [-0.1417, -0.1418],
}
)
groups_2km = {**POI_GROUPS_2KM, "groceries": _groceries_categories(pois)}
result = count_pois_per_postcode(postcodes, pois, groups=groups_2km, radius_km=2)
# Both the GEOLYTIX brand ("Tesco") and the OSM "Supermarket" must count.
# Pre-fix the static list was ["Greengrocer", "Supermarket", "Convenience
# Store"], so "Tesco" was dropped and this was 1.
assert result["groceries_2km"][0] == 2
def test_dynamic_poi_groups_include_requested_categories_only() -> None:
pois = pl.DataFrame(
{
"group": (
["Public Transport"] * 2
+ ["Leisure"] * 2
+ ["Groceries"] * 101
+ ["Groceries"] * 100
+ ["Leisure"] * 10
+ ["Education"] * 200
+ ["Health"] * 200
),
"category": (
["Rail station", "Bus stop"]
+ ["Café", "Restaurant"]
+ ["Tesco"] * 101
+ ["Waitrose"] * 100
+ ["Park"] * 10
+ ["School"] * 200
+ ["Pharmacy"] * 200
),
"lat": [51.5] * 615,
"lng": [-0.1] * 615,
}
)
groups, display_names = _build_poi_category_groups(pois)
assert set(display_names.values()) == {
"Bus stop",
"Café",
"Pharmacy",
"Rail station",
"Restaurant",
"Tesco",
}
assert "poi_waitrose" not in groups
assert "poi_park" not in groups
assert "poi_school" not in groups
def test_dynamic_poi_metric_renames_support_park_count_options() -> None:
assert _dynamic_poi_metric_renames({"parks": "Park"}) == {
"parks_nearest_km": "Distance to nearest amenity (Park) (km)",
"parks_2km": "Number of amenities (Park) within 2km",
"parks_5km": "Number of amenities (Park) within 5km",
}