import polars as pl from pipeline.download.places import ( _is_dlr_station, _is_tram_station, _naptan_dlr_stations, _station_display_name, ) def test_dlr_light_rail_is_not_treated_as_tram(): dlr_tags = { "name": "Lewisham DLR", "railway": "station", "station": "light_rail", "network": "Docklands Light Railway", } assert _is_dlr_station(dlr_tags) assert not _is_tram_station(dlr_tags) assert _station_display_name("Lewisham DLR", dlr_tags) == "Lewisham DLR station" assert ( _station_display_name("Tower Gateway Station DLR", dlr_tags) == "Tower Gateway DLR station" ) def test_tram_light_rail_is_still_excluded(): tram_tags = { "name": "East Croydon", "railway": "station", "station": "light_rail", "network": "London Trams", } assert not _is_dlr_station(tram_tags) assert _is_tram_station(tram_tags) def test_naptan_dlr_stations_are_deduplicated_by_atco_code(tmp_path): naptan = tmp_path / "naptan.parquet" pl.DataFrame( { "id": [ "4900ZZDLSHA3", "9400ZZDLSHA", "4900ZZDLGRE1", "490002076RV", "4900ZZLUBNK", ], "name": [ "Shadwell DLR", "Shadwell DLR Station", "Greenwich Station", "Tower Gateway Station DLR", "Bank", ], "category": [ "Tube station", "Tube station", "Rail station", "Bus stop", "Tube station", ], "lat": [51.51156, 51.511693, 51.47794, 51.510575, 51.5131], "lng": [-0.055595, -0.056643, -0.01442, -0.07514, -0.0894], } ).write_parquet(naptan) stations = _naptan_dlr_stations(naptan) assert [station["name"] for station in stations] == [ "Greenwich DLR station", "Shadwell DLR station", ] shadwell = next( station for station in stations if station["name"].startswith("Shadwell") ) assert shadwell["lat"] == (51.51156 + 51.511693) / 2 assert shadwell["place_type"] == "station" assert shadwell["travel_destination"] is True