idgf
This commit is contained in:
parent
fbfebc651c
commit
aab85fe32e
33 changed files with 2016 additions and 283 deletions
|
|
@ -9,10 +9,12 @@ from pipeline.download.places import (
|
|||
_display_city_from_tags,
|
||||
_is_dlr_station,
|
||||
_is_tram_station,
|
||||
_london_postcode_tree,
|
||||
_naptan_dlr_stations,
|
||||
_normalize_street_name,
|
||||
_ofs_universities,
|
||||
_outcode_of_postcode,
|
||||
_outcode_tree,
|
||||
_pois_to_places,
|
||||
_select_university_name,
|
||||
_station_display_name,
|
||||
|
|
@ -242,6 +244,42 @@ def test_pois_to_places_keeps_high_value_named_pois_only():
|
|||
assert all(place["travel_destination"] is False for place in places)
|
||||
|
||||
|
||||
def test_pois_to_places_keeps_distinct_same_named_pois():
|
||||
# Two genuinely distinct POIs sharing a name, far apart (London vs Bristol).
|
||||
pois = pl.DataFrame(
|
||||
{
|
||||
"name": ["Victoria Park", "Victoria Park"],
|
||||
"category": ["leisure/park", "leisure/park"],
|
||||
"lat": [51.54, 51.46],
|
||||
"lng": [-0.04, -2.60],
|
||||
}
|
||||
)
|
||||
|
||||
places = _pois_to_places(pois)
|
||||
|
||||
assert len(places) == 2
|
||||
assert {(place["lat"], place["lon"]) for place in places} == {
|
||||
(51.54, -0.04),
|
||||
(51.46, -2.60),
|
||||
}
|
||||
|
||||
|
||||
def test_pois_to_places_still_dedupes_colocated():
|
||||
# The same physical POI mapped twice a few metres apart collapses to one.
|
||||
pois = pl.DataFrame(
|
||||
{
|
||||
"name": ["Victoria Park", "Victoria Park"],
|
||||
"category": ["leisure/park", "leisure/park"],
|
||||
"lat": [51.5400, 51.5401],
|
||||
"lng": [-0.0400, -0.0399],
|
||||
}
|
||||
)
|
||||
|
||||
places = _pois_to_places(pois)
|
||||
|
||||
assert len(places) == 1
|
||||
|
||||
|
||||
def test_display_city_from_tags_uses_explicit_london_context():
|
||||
assert _display_city_from_tags({"is_in": "Croydon, London, UK"}) == "London"
|
||||
assert _display_city_from_tags({"is_in": "Croydon, Cambridgeshire, UK"}) is None
|
||||
|
|
@ -290,3 +328,52 @@ def test_assign_london_display_city_uses_nearest_active_postcode_admin(tmp_path)
|
|||
|
||||
assert assigned == 2
|
||||
assert [place["display_city"] for place in places] == ["London", "London", None]
|
||||
|
||||
|
||||
def test_no_grid_reference_sentinel_is_excluded_from_coordinate_trees(tmp_path):
|
||||
# ONS NSPL stores postcodes with no grid reference at the Null-Island sentinel
|
||||
# lat=99.999999, long=0.0, whose paired BNG coords collapse to the (0, 0) origin.
|
||||
# Such an active postcode must never enter the nearest-neighbour indexes.
|
||||
sentinel = {
|
||||
"pcds": "ZZ99 9ZZ",
|
||||
"lat": 99.999999,
|
||||
"long": 0.0,
|
||||
"doterm": None,
|
||||
"ctry25cd": "E92000001",
|
||||
"east1m": 0,
|
||||
"north1m": 0,
|
||||
"rgn25cd": "E12000007",
|
||||
"lad25cd": "E09000008",
|
||||
"cty25cd": "E13000002",
|
||||
}
|
||||
croydon_easting, croydon_northing = WGS84_TO_BNG.transform(-0.101793, 51.371273)
|
||||
real = {
|
||||
"pcds": "CR0 1SZ",
|
||||
"lat": 51.371273,
|
||||
"long": -0.101793,
|
||||
"doterm": None,
|
||||
"ctry25cd": "E92000001",
|
||||
"east1m": int(round(croydon_easting)),
|
||||
"north1m": int(round(croydon_northing)),
|
||||
"rgn25cd": "E12000007",
|
||||
"lad25cd": "E09000008",
|
||||
"cty25cd": "E13000002",
|
||||
}
|
||||
postcodes = tmp_path / "postcodes.parquet"
|
||||
pl.DataFrame([sentinel, real]).write_parquet(postcodes)
|
||||
|
||||
# lat/long outcode tree: only the real postcode survives, so a London-area query
|
||||
# cannot be tagged with the sentinel's (empty) outcode.
|
||||
tree, outcodes = _outcode_tree(postcodes)
|
||||
assert tree.n == 1
|
||||
assert outcodes == ["CR0"]
|
||||
_, idx = tree.query([[51.371273, -0.101793]])
|
||||
assert outcodes[idx[0]] == "CR0"
|
||||
|
||||
# BNG London tree: only the real postcode survives, so the (0, 0) origin can never
|
||||
# be the nearest neighbour of a real place.
|
||||
bng_tree, london_flags = _london_postcode_tree(postcodes)
|
||||
assert bng_tree.n == 1
|
||||
assert london_flags.tolist() == [True]
|
||||
_, bng_idx = bng_tree.query([[croydon_easting, croydon_northing]])
|
||||
assert bng_idx[0] == 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue