This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -1,15 +1,22 @@
import numpy as np
import polars as pl
from pyproj import Transformer
from scipy.spatial import cKDTree
from pipeline.download.places import (
_assign_london_display_city,
_build_street_places,
_display_city_from_tags,
_is_dlr_station,
_is_tram_station,
_naptan_dlr_stations,
_normalize_street_name,
_ofs_universities,
_outcode_of_postcode,
_pois_to_places,
_select_university_name,
_station_display_name,
_street_centroid,
)
WGS84_TO_BNG = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
@ -168,6 +175,73 @@ def test_ofs_universities_extracts_university_title_rows_with_postcode_coords():
]
def test_street_centroid_averages_vertices():
assert _street_centroid([(51.0, -0.1), (53.0, -0.3)]) == (52.0, -0.2)
assert _street_centroid([]) is None
def test_normalize_street_name_and_outcode():
assert _normalize_street_name(" High Street ") == "high street"
assert _outcode_of_postcode("NW1 6XE") == "NW1"
assert _outcode_of_postcode("") == ""
def test_build_street_places_groups_segments_by_name_and_outcode():
# Two postcodes: NW1 (north) and CR0 (south).
tree = cKDTree(np.array([[51.53, -0.14], [51.37, -0.10]], dtype=np.float64))
outcodes = ["NW1", "CR0"]
streets = [
{"name": "High Street", "lat": 51.531, "lon": -0.141}, # NW1
{"name": "High Street", "lat": 51.529, "lon": -0.139}, # NW1 (same road, 2nd segment)
{"name": "High Street", "lat": 51.371, "lon": -0.101}, # CR0 (different road, same name)
{"name": "Baker Street", "lat": 51.5305, "lon": -0.1405}, # NW1
]
places = _build_street_places(streets, tree, outcodes)
# 3 distinct streets: High Street/NW1 (2 segments merged), High Street/CR0, Baker Street/NW1.
assert len(places) == 3
assert all(place["place_type"] == "street" for place in places)
nw1_high = next(
place
for place in places
if place["name"] == "High Street" and place["lat"] > 51.5
)
assert nw1_high["lat"] == (51.531 + 51.529) / 2
assert nw1_high["lon"] == (-0.141 + -0.139) / 2
# The same-named CR0 road stays separate.
assert any(
place["name"] == "High Street" and place["lat"] < 51.4 for place in places
)
def test_pois_to_places_keeps_high_value_named_pois_only():
pois = pl.DataFrame(
{
"name": ["Hyde Park", "St Thomas' Hospital", "Joe's Cafe", "Hyde Park", ""],
"category": [
"leisure/park",
"amenity/hospital",
"amenity/cafe", # everyday amenity → excluded
"leisure/park", # duplicate → excluded
"leisure/park", # empty name → excluded
],
"lat": [51.507, 51.498, 51.5, 51.507, 51.6],
"lng": [-0.165, -0.118, -0.1, -0.165, -0.2],
}
)
places = _pois_to_places(pois)
assert [(place["name"], place["place_type"]) for place in places] == [
("Hyde Park", "park"),
("St Thomas' Hospital", "hospital"),
]
assert all(place["travel_destination"] is False for place in places)
def test_display_city_from_tags_uses_explicit_london_context():
assert _display_city_from_tags({"is_in": "Croydon, London, UK"}) == "London"
assert _display_city_from_tags({"is_in": "Croydon, Cambridgeshire, UK"}) is None