idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -89,3 +89,92 @@ def test_deduplicate_naptan_does_not_merge_missing_locality_bus_stops():
|
|||
result = deduplicate_naptan(df)
|
||||
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
def test_deduplicate_naptan_merges_colocated_missing_locality_bus_stations():
|
||||
# Two NaPTAN records for the same bus station with no locality, co-located
|
||||
# within the merge area, are a true duplicate and collapse to one POI.
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["a", "b"],
|
||||
"name": ["Victoria Bus Station", "Victoria Bus Station"],
|
||||
"category": ["Bus station", "Bus station"],
|
||||
"lat": [51.4952, 51.4953],
|
||||
"lng": [-0.1441, -0.1440],
|
||||
"locality": [None, None],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result["name"][0] == "Victoria Bus Station"
|
||||
assert result["category"][0] == "Bus station"
|
||||
assert result["lat"][0] == pytest.approx((51.4952 + 51.4953) / 2)
|
||||
|
||||
|
||||
def test_deduplicate_naptan_keeps_rail_station_with_only_station_node():
|
||||
# Aberdare's only NaPTAN record is an RLY station node (StopType "RLY").
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["aberdare-rly"],
|
||||
"name": ["Aberdare Rail Station"],
|
||||
"category": ["Rail station"],
|
||||
"lat": [51.7155],
|
||||
"lng": [-3.4438],
|
||||
"locality": ["ABERDARE"],
|
||||
"entrance": [False],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result["name"][0] == "Aberdare Rail Station"
|
||||
assert result["category"][0] == "Rail station"
|
||||
|
||||
|
||||
def test_deduplicate_naptan_merges_rail_entrances_into_station_node():
|
||||
# A station node (RLY) and its two entrance nodes (RSE) collapse to a single
|
||||
# "Rail station" POI represented by the station node, not an entrance.
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["clapham-rly", "clapham-rse-a", "clapham-rse-b"],
|
||||
"name": [
|
||||
"Clapham Junction Rail Station",
|
||||
"Clapham Junction Rail Station",
|
||||
"Clapham Junction Rail Station",
|
||||
],
|
||||
"category": ["Rail station", "Rail station", "Rail station"],
|
||||
"lat": [51.4642, 51.4644, 51.4640],
|
||||
"lng": [-0.1705, -0.1702, -0.1708],
|
||||
"locality": ["CLAPHAM", "CLAPHAM", "CLAPHAM"],
|
||||
"entrance": [False, True, True],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result["id"][0] == "clapham-rly"
|
||||
assert result["category"][0] == "Rail station"
|
||||
|
||||
|
||||
def test_deduplicate_naptan_does_not_merge_rail_and_ferry_in_same_area():
|
||||
# Different transport modes sharing a name/area stay as separate POIs.
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["harbour-rail", "harbour-ferry"],
|
||||
"name": ["Harbour Station", "Harbour Station"],
|
||||
"category": ["Rail station", "Ferry"],
|
||||
"lat": [51.5, 51.5001],
|
||||
"lng": [-0.1, -0.1001],
|
||||
"locality": ["HARBOUR", "HARBOUR"],
|
||||
"entrance": [False, False],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df).sort("category")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result["category"].to_list() == ["Ferry", "Rail station"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue