This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -1,9 +1,44 @@
import polars as pl
from pipeline.transform.poi_proximity import (
POI_GROUPS_2KM,
_build_poi_category_groups,
_dynamic_poi_metric_renames,
_groceries_categories,
)
from pipeline.utils.poi_counts import count_pois_per_postcode
def test_groceries_2km_counts_geolytix_brand_categories() -> None:
"""The static groceries 2km count must include GEOLYTIX brand POIs.
GEOLYTIX stores the brand (e.g. "Tesco") in `category` with group
"Groceries" and never emits the literal "Supermarket"; matching only the
OSM strings counts the supermarket but drops the brand store.
"""
postcodes = pl.DataFrame(
{
"postcode": ["SW1A 1AA"],
"lat": [51.5010],
"lon": [-0.1416],
}
)
pois = pl.DataFrame(
{
"category": ["Tesco", "Supermarket"],
"group": ["Groceries", "Groceries"],
"lat": [51.5011, 51.5012],
"lng": [-0.1417, -0.1418],
}
)
groups_2km = {**POI_GROUPS_2KM, "groceries": _groceries_categories(pois)}
result = count_pois_per_postcode(postcodes, pois, groups=groups_2km, radius_km=2)
# Both the GEOLYTIX brand ("Tesco") and the OSM "Supermarket" must count.
# Pre-fix the static list was ["Greengrocer", "Supermarket", "Convenience
# Store"], so "Tesco" was dropped and this was 1.
assert result["groceries_2km"][0] == 2
def test_dynamic_poi_groups_include_requested_categories_only() -> None: