This commit is contained in:
Andras Schmelczer 2026-03-12 22:11:00 +00:00
parent 14a3555cf1
commit 7e92bf112e
34 changed files with 1214437 additions and 224 deletions

View file

@ -1,13 +1,14 @@
import numpy as np
import polars as pl
import pytest
from pipeline.utils.poi_counts import count_pois_per_postcode
from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode
POI_GROUPS = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Supermarket"],
"parks": ["Park"],
"public_transport": ["Station"],
"train_tube": ["Rail station", "Metro or Tram stop"],
}
@ -23,7 +24,7 @@ def pois():
"Fast Food",
"Supermarket",
"Park",
"Station",
"Rail station",
"Restaurant", # too far from any property
],
}
@ -55,7 +56,7 @@ def test_counts_pois_within_radius(postcodes, pois):
assert ec1a["restaurants_2km"][0] == 2 # Restaurant + Fast Food
assert ec1a["groceries_2km"][0] == 1 # Supermarket
assert ec1a["parks_2km"][0] == 1 # Park
assert ec1a["public_transport_2km"][0] == 1 # Station
assert ec1a["train_tube_2km"][0] == 1 # Rail station
# Far-away postcode should have zero counts
zz99 = result.filter(pl.col("postcode") == "ZZ99 9ZZ")
@ -96,3 +97,35 @@ def test_custom_radius(pois):
# POIs >100m away should not be counted
total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS)
assert total <= 2 # at most the co-located POIs
def test_min_distance_finds_nearest(postcodes, pois):
"""min_distance_per_postcode returns distance to closest POI per group."""
result = min_distance_per_postcode(postcodes, pois, groups=POI_GROUPS)
assert len(result) == 2
ec1a = result.filter(pl.col("postcode") == "EC1A 1BB")
# Rail station is at (51.5073, -0.1277), postcode at (51.5074, -0.1278) — very close
assert ec1a["train_tube_nearest_km"][0] < 0.05 # within 50m
# Restaurant is co-located — distance ~0
assert ec1a["restaurants_nearest_km"][0] < 0.01
# Far-away postcode should have NaN (no POIs within grid range)
zz99 = result.filter(pl.col("postcode") == "ZZ99 9ZZ")
assert np.isnan(zz99["train_tube_nearest_km"][0])
def test_min_distance_no_pois_returns_nan(postcodes):
"""With no POIs, all distances should be NaN."""
empty_pois = pl.DataFrame(
{
"lat": pl.Series([], dtype=pl.Float64),
"lng": pl.Series([], dtype=pl.Float64),
"category": pl.Series([], dtype=pl.String),
}
)
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
assert "train_tube_nearest_km" in result.columns
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())