148 lines
4.3 KiB
Python
148 lines
4.3 KiB
Python
import polars as pl
|
|
|
|
from pipeline.download.places import (
|
|
_is_dlr_station,
|
|
_is_tram_station,
|
|
_naptan_dlr_stations,
|
|
_ofs_universities,
|
|
_select_university_name,
|
|
_station_display_name,
|
|
)
|
|
|
|
|
|
def test_dlr_light_rail_is_not_treated_as_tram():
|
|
dlr_tags = {
|
|
"name": "Lewisham DLR",
|
|
"railway": "station",
|
|
"station": "light_rail",
|
|
"network": "Docklands Light Railway",
|
|
}
|
|
|
|
assert _is_dlr_station(dlr_tags)
|
|
assert not _is_tram_station(dlr_tags)
|
|
assert _station_display_name("Lewisham DLR", dlr_tags) == "Lewisham DLR station"
|
|
assert (
|
|
_station_display_name("Tower Gateway Station DLR", dlr_tags)
|
|
== "Tower Gateway DLR station"
|
|
)
|
|
|
|
|
|
def test_tram_light_rail_is_still_excluded():
|
|
tram_tags = {
|
|
"name": "East Croydon",
|
|
"railway": "station",
|
|
"station": "light_rail",
|
|
"network": "London Trams",
|
|
}
|
|
|
|
assert not _is_dlr_station(tram_tags)
|
|
assert _is_tram_station(tram_tags)
|
|
|
|
|
|
def test_naptan_dlr_stations_are_deduplicated_by_atco_code(tmp_path):
|
|
naptan = tmp_path / "naptan.parquet"
|
|
pl.DataFrame(
|
|
{
|
|
"id": [
|
|
"4900ZZDLSHA3",
|
|
"9400ZZDLSHA",
|
|
"4900ZZDLGRE1",
|
|
"490002076RV",
|
|
"4900ZZLUBNK",
|
|
],
|
|
"name": [
|
|
"Shadwell DLR",
|
|
"Shadwell DLR Station",
|
|
"Greenwich Station",
|
|
"Tower Gateway Station DLR",
|
|
"Bank",
|
|
],
|
|
"category": [
|
|
"Tube station",
|
|
"Tube station",
|
|
"Rail station",
|
|
"Bus stop",
|
|
"Tube station",
|
|
],
|
|
"lat": [51.51156, 51.511693, 51.47794, 51.510575, 51.5131],
|
|
"lng": [-0.055595, -0.056643, -0.01442, -0.07514, -0.0894],
|
|
}
|
|
).write_parquet(naptan)
|
|
|
|
stations = _naptan_dlr_stations(naptan)
|
|
|
|
assert [station["name"] for station in stations] == [
|
|
"Greenwich DLR station",
|
|
"Shadwell DLR station",
|
|
]
|
|
shadwell = next(
|
|
station for station in stations if station["name"].startswith("Shadwell")
|
|
)
|
|
assert shadwell["lat"] == (51.51156 + 51.511693) / 2
|
|
assert shadwell["place_type"] == "station"
|
|
assert shadwell["travel_destination"] is True
|
|
|
|
|
|
def test_select_university_name_prefers_public_trading_name_for_noisy_legal_name():
|
|
assert (
|
|
_select_university_name(
|
|
"The Chancellor, Masters and Scholars of the University of Oxford",
|
|
"Oxford University\nThe University of Oxford",
|
|
)
|
|
== "Oxford University"
|
|
)
|
|
assert (
|
|
_select_university_name(
|
|
"Bournemouth University Higher Education Corporation",
|
|
"Bournemouth University",
|
|
)
|
|
== "Bournemouth University"
|
|
)
|
|
assert (
|
|
_select_university_name("The University of Surrey", "Not applicable")
|
|
== "University of Surrey"
|
|
)
|
|
|
|
|
|
def test_ofs_universities_extracts_university_title_rows_with_postcode_coords():
|
|
raw_register = pl.DataFrame(
|
|
[
|
|
["OfS Register", None, None, None],
|
|
["Note row", None, None, None],
|
|
[
|
|
"Provider's legal name",
|
|
"Provider's trading name(s)",
|
|
"Provider's contact address",
|
|
"Does the provider have the right to use university in its title?",
|
|
],
|
|
[
|
|
"The Chancellor, Masters and Scholars of the University of Oxford",
|
|
"Oxford University\nThe University of Oxford",
|
|
"University Offices\nWellington Square\nOxford\nOX1 2JD\nUnited Kingdom",
|
|
"Yes",
|
|
],
|
|
[
|
|
"Example College",
|
|
"Not applicable",
|
|
"Example Street\nLondon\nSW1A 1AA\nUnited Kingdom",
|
|
"No",
|
|
],
|
|
],
|
|
orient="row",
|
|
)
|
|
|
|
universities, skipped = _ofs_universities(
|
|
raw_register, {"OX12JD": (51.7585, -1.2643)}
|
|
)
|
|
|
|
assert skipped == 0
|
|
assert universities == [
|
|
{
|
|
"name": "Oxford University",
|
|
"place_type": "university",
|
|
"lat": 51.7585,
|
|
"lon": -1.2643,
|
|
"population": 0,
|
|
"travel_destination": True,
|
|
}
|
|
]
|