Extarct utils
This commit is contained in:
parent
0153e46478
commit
e1b38a1b95
8 changed files with 458 additions and 25 deletions
85
pipeline/utils/test_poi_counts.py
Normal file
85
pipeline/utils/test_poi_counts.py
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import polars as pl
|
||||
import pytest
|
||||
|
||||
from pipeline.utils.poi_counts import POI_GROUPS, count_pois_within_radius
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pois():
|
||||
"""POIs clustered around two locations: central London and 10km away."""
|
||||
return pl.DataFrame({
|
||||
"lat": [51.5074, 51.5075, 51.5080, 51.5076, 51.5073, 51.60],
|
||||
"lng": [-0.1278, -0.1280, -0.1275, -0.1279, -0.1277, -0.20],
|
||||
"category": [
|
||||
"Restaurant",
|
||||
"Fast Food",
|
||||
"Supermarket",
|
||||
"Park",
|
||||
"Station",
|
||||
"Restaurant", # too far from any property
|
||||
],
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def properties():
|
||||
"""Two properties at the same postcode near central London, one at a distant postcode."""
|
||||
return pl.DataFrame({
|
||||
"postcode": ["EC1A 1BB", "EC1A 1BB", "ZZ99 9ZZ"],
|
||||
"lat": [51.5074, 51.5074, 55.0],
|
||||
"lon": [-0.1278, -0.1278, -3.0],
|
||||
})
|
||||
|
||||
|
||||
def test_counts_pois_within_radius(properties, pois):
|
||||
result = count_pois_within_radius(properties, pois, radius_km=2.0)
|
||||
|
||||
assert set(result.keys()) == {f"{g}_2km" for g in POI_GROUPS}
|
||||
|
||||
# Result Series must be aligned to properties (3 rows)
|
||||
for col, series in result.items():
|
||||
assert len(series) == 3, f"{col} has {len(series)} rows, expected 3"
|
||||
|
||||
# First two rows share a postcode near the central London cluster
|
||||
assert result["restaurants_2km"][0] == 2 # Restaurant + Fast Food
|
||||
assert result["groceries_2km"][0] == 1 # Supermarket
|
||||
assert result["parks_2km"][0] == 1 # Park
|
||||
assert result["public_transport_2km"][0] == 1 # Station
|
||||
|
||||
# Second row is the same postcode, so same counts
|
||||
assert result["restaurants_2km"][1] == result["restaurants_2km"][0]
|
||||
|
||||
# Third row (ZZ99 9ZZ) is far from all POIs → zero counts
|
||||
for group in POI_GROUPS:
|
||||
assert result[f"{group}_2km"][2] == 0
|
||||
|
||||
|
||||
def test_no_pois_returns_zeros(properties):
|
||||
empty_pois = pl.DataFrame({
|
||||
"lat": pl.Series([], dtype=pl.Float64),
|
||||
"lng": pl.Series([], dtype=pl.Float64),
|
||||
"category": pl.Series([], dtype=pl.String),
|
||||
})
|
||||
result = count_pois_within_radius(properties, empty_pois, radius_km=2.0)
|
||||
|
||||
for group in POI_GROUPS:
|
||||
col = f"{group}_2km"
|
||||
assert col in result
|
||||
assert result[col].to_list() == [0, 0, 0]
|
||||
|
||||
|
||||
def test_custom_radius(pois):
|
||||
"""A tiny radius should exclude POIs that are even slightly away."""
|
||||
properties = pl.DataFrame({
|
||||
"postcode": ["EC1A 1BB"],
|
||||
"lat": [51.5074],
|
||||
"lon": [-0.1278],
|
||||
})
|
||||
|
||||
# 0.01 km = 10m — only the POI at the exact same location should match
|
||||
result = count_pois_within_radius(properties, pois, radius_km=0.01)
|
||||
# The Restaurant at (51.5074, -0.1278) is at distance 0
|
||||
assert result["restaurants_0km"][0] >= 1
|
||||
# POIs >100m away should not be counted
|
||||
total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS)
|
||||
assert total <= 2 # at most the co-located POIs
|
||||
Loading…
Add table
Add a link
Reference in a new issue