Codex changes
This commit is contained in:
parent
0bae902e08
commit
d4dde21ad2
46 changed files with 4953 additions and 966 deletions
71
pipeline/download/test_naptan.py
Normal file
71
pipeline/download/test_naptan.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import polars as pl
|
||||
import pytest
|
||||
|
||||
from pipeline.download.naptan import canonical_station_name_expr, deduplicate_naptan
|
||||
|
||||
|
||||
def test_canonical_station_name_expr_normalizes_transport_suffixes():
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"name": [
|
||||
"Bank",
|
||||
"Bank Underground Station",
|
||||
"Bank DLR Station",
|
||||
"Pleasure Beach (Blackpool Tramway)",
|
||||
"Earl's Court Tube Station",
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
result = df.select(canonical_station_name_expr().alias("key"))["key"].to_list()
|
||||
|
||||
assert result == [
|
||||
"bank",
|
||||
"bank",
|
||||
"bank",
|
||||
"pleasure beach",
|
||||
"earls court",
|
||||
]
|
||||
|
||||
|
||||
def test_deduplicate_naptan_merges_tube_station_variants_by_locality():
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["bank", "bank-lu", "bank-dlr", "other-bank"],
|
||||
"name": [
|
||||
"Bank",
|
||||
"Bank Underground Station",
|
||||
"Bank DLR Station",
|
||||
"Bank Underground Station",
|
||||
],
|
||||
"category": ["Tube station"] * 4,
|
||||
"lat": [51.5129, 51.5134, 51.5132, 55.0140],
|
||||
"lng": [-0.0889, -0.0890, -0.0885, -1.6781],
|
||||
"locality": ["LOC1", "LOC1", "LOC1", "LOC2"],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df).sort("lat")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result["name"].to_list() == ["Bank", "Bank Underground Station"]
|
||||
assert result["lat"].to_list()[0] == pytest.approx(
|
||||
(51.5129 + 51.5134 + 51.5132) / 3
|
||||
)
|
||||
|
||||
|
||||
def test_deduplicate_naptan_does_not_merge_missing_locality_bus_stops():
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": ["a", "b"],
|
||||
"name": ["High Street", "High Street"],
|
||||
"category": ["Bus stop", "Bus stop"],
|
||||
"lat": [51.5, 52.5],
|
||||
"lng": [-0.1, -1.1],
|
||||
"locality": [None, None],
|
||||
}
|
||||
)
|
||||
|
||||
result = deduplicate_naptan(df)
|
||||
|
||||
assert len(result) == 2
|
||||
Loading…
Add table
Add a link
Reference in a new issue