This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -1,7 +1,14 @@
import polars as pl
import pytest
from pipeline.utils.poi_counts import POI_GROUPS, count_pois_within_radius
from pipeline.utils.poi_counts import count_pois_per_postcode
POI_GROUPS = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Supermarket"],
"parks": ["Park"],
"public_transport": ["Station"],
}
@pytest.fixture
@ -24,41 +31,39 @@ def pois():
@pytest.fixture
def properties():
"""Two properties at the same postcode near central London, one at a distant postcode."""
def postcodes():
"""Two postcodes: one near central London, one far away."""
return pl.DataFrame(
{
"postcode": ["EC1A 1BB", "EC1A 1BB", "ZZ99 9ZZ"],
"lat": [51.5074, 51.5074, 55.0],
"lon": [-0.1278, -0.1278, -3.0],
"postcode": ["EC1A 1BB", "ZZ99 9ZZ"],
"lat": [51.5074, 55.0],
"lon": [-0.1278, -3.0],
}
)
def test_counts_pois_within_radius(properties, pois):
result = count_pois_within_radius(properties, pois, radius_km=2.0)
def test_counts_pois_within_radius(postcodes, pois):
result = count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=2.0)
assert set(result.keys()) == {f"{g}_2km" for g in POI_GROUPS}
expected_cols = {f"{g}_2km" for g in POI_GROUPS}
assert expected_cols.issubset(set(result.columns))
# Result Series must be aligned to properties (3 rows)
for col, series in result.items():
assert len(series) == 3, f"{col} has {len(series)} rows, expected 3"
# Result must be aligned to postcodes (2 rows)
assert len(result) == 2
# First two rows share a postcode near the central London cluster
assert result["restaurants_2km"][0] == 2 # Restaurant + Fast Food
assert result["groceries_2km"][0] == 1 # Supermarket
assert result["parks_2km"][0] == 1 # Park
assert result["public_transport_2km"][0] == 1 # Station
ec1a = result.filter(pl.col("postcode") == "EC1A 1BB")
assert ec1a["restaurants_2km"][0] == 2 # Restaurant + Fast Food
assert ec1a["groceries_2km"][0] == 1 # Supermarket
assert ec1a["parks_2km"][0] == 1 # Park
assert ec1a["public_transport_2km"][0] == 1 # Station
# Second row is the same postcode, so same counts
assert result["restaurants_2km"][1] == result["restaurants_2km"][0]
# Third row (ZZ99 9ZZ) is far from all POIs → zero counts
# Far-away postcode should have zero counts
zz99 = result.filter(pl.col("postcode") == "ZZ99 9ZZ")
for group in POI_GROUPS:
assert result[f"{group}_2km"][2] == 0
assert zz99[f"{group}_2km"][0] == 0
def test_no_pois_returns_zeros(properties):
def test_no_pois_returns_zeros(postcodes):
empty_pois = pl.DataFrame(
{
"lat": pl.Series([], dtype=pl.Float64),
@ -66,17 +71,17 @@ def test_no_pois_returns_zeros(properties):
"category": pl.Series([], dtype=pl.String),
}
)
result = count_pois_within_radius(properties, empty_pois, radius_km=2.0)
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0)
for group in POI_GROUPS:
col = f"{group}_2km"
assert col in result
assert result[col].to_list() == [0, 0, 0]
assert col in result.columns
assert result[col].to_list() == [0, 0]
def test_custom_radius(pois):
"""A tiny radius should exclude POIs that are even slightly away."""
properties = pl.DataFrame(
postcodes = pl.DataFrame(
{
"postcode": ["EC1A 1BB"],
"lat": [51.5074],
@ -85,7 +90,7 @@ def test_custom_radius(pois):
)
# 0.01 km = 10m — only the POI at the exact same location should match
result = count_pois_within_radius(properties, pois, radius_km=0.01)
result = count_pois_per_postcode(postcodes, pois, groups=POI_GROUPS, radius_km=0.01)
# The Restaurant at (51.5074, -0.1278) is at distance 0
assert result["restaurants_0km"][0] >= 1
# POIs >100m away should not be counted