import polars as pl import pytest from pipeline.utils.poi_counts import POI_GROUPS, count_pois_within_radius @pytest.fixture def pois(): """POIs clustered around two locations: central London and 10km away.""" return pl.DataFrame( { "lat": [51.5074, 51.5075, 51.5080, 51.5076, 51.5073, 51.60], "lng": [-0.1278, -0.1280, -0.1275, -0.1279, -0.1277, -0.20], "category": [ "Restaurant", "Fast Food", "Supermarket", "Park", "Station", "Restaurant", # too far from any property ], } ) @pytest.fixture def properties(): """Two properties at the same postcode near central London, one at a distant postcode.""" return pl.DataFrame( { "postcode": ["EC1A 1BB", "EC1A 1BB", "ZZ99 9ZZ"], "lat": [51.5074, 51.5074, 55.0], "lon": [-0.1278, -0.1278, -3.0], } ) def test_counts_pois_within_radius(properties, pois): result = count_pois_within_radius(properties, pois, radius_km=2.0) assert set(result.keys()) == {f"{g}_2km" for g in POI_GROUPS} # Result Series must be aligned to properties (3 rows) for col, series in result.items(): assert len(series) == 3, f"{col} has {len(series)} rows, expected 3" # First two rows share a postcode near the central London cluster assert result["restaurants_2km"][0] == 2 # Restaurant + Fast Food assert result["groceries_2km"][0] == 1 # Supermarket assert result["parks_2km"][0] == 1 # Park assert result["public_transport_2km"][0] == 1 # Station # Second row is the same postcode, so same counts assert result["restaurants_2km"][1] == result["restaurants_2km"][0] # Third row (ZZ99 9ZZ) is far from all POIs → zero counts for group in POI_GROUPS: assert result[f"{group}_2km"][2] == 0 def test_no_pois_returns_zeros(properties): empty_pois = pl.DataFrame( { "lat": pl.Series([], dtype=pl.Float64), "lng": pl.Series([], dtype=pl.Float64), "category": pl.Series([], dtype=pl.String), } ) result = count_pois_within_radius(properties, empty_pois, radius_km=2.0) for group in POI_GROUPS: col = f"{group}_2km" assert col in result assert result[col].to_list() == [0, 0, 0] def test_custom_radius(pois): """A tiny radius should exclude POIs that are even slightly away.""" properties = pl.DataFrame( { "postcode": ["EC1A 1BB"], "lat": [51.5074], "lon": [-0.1278], } ) # 0.01 km = 10m — only the POI at the exact same location should match result = count_pois_within_radius(properties, pois, radius_km=0.01) # The Restaurant at (51.5074, -0.1278) is at distance 0 assert result["restaurants_0km"][0] >= 1 # POIs >100m away should not be counted total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS) assert total <= 2 # at most the co-located POIs