This commit is contained in:
Andras Schmelczer 2026-05-12 22:30:36 +01:00
parent 81a16f543c
commit 63713c3a2b
15 changed files with 492 additions and 159 deletions

View file

@ -4,6 +4,8 @@ from pipeline.download.places import (
_is_dlr_station,
_is_tram_station,
_naptan_dlr_stations,
_ofs_universities,
_select_university_name,
_station_display_name,
)
@ -79,3 +81,68 @@ def test_naptan_dlr_stations_are_deduplicated_by_atco_code(tmp_path):
assert shadwell["lat"] == (51.51156 + 51.511693) / 2
assert shadwell["place_type"] == "station"
assert shadwell["travel_destination"] is True
def test_select_university_name_prefers_public_trading_name_for_noisy_legal_name():
assert (
_select_university_name(
"The Chancellor, Masters and Scholars of the University of Oxford",
"Oxford University\nThe University of Oxford",
)
== "Oxford University"
)
assert (
_select_university_name(
"Bournemouth University Higher Education Corporation",
"Bournemouth University",
)
== "Bournemouth University"
)
assert (
_select_university_name("The University of Surrey", "Not applicable")
== "University of Surrey"
)
def test_ofs_universities_extracts_university_title_rows_with_postcode_coords():
raw_register = pl.DataFrame(
[
["OfS Register", None, None, None],
["Note row", None, None, None],
[
"Provider's legal name",
"Provider's trading name(s)",
"Provider's contact address",
"Does the provider have the right to use university in its title?",
],
[
"The Chancellor, Masters and Scholars of the University of Oxford",
"Oxford University\nThe University of Oxford",
"University Offices\nWellington Square\nOxford\nOX1 2JD\nUnited Kingdom",
"Yes",
],
[
"Example College",
"Not applicable",
"Example Street\nLondon\nSW1A 1AA\nUnited Kingdom",
"No",
],
],
orient="row",
)
universities, skipped = _ofs_universities(
raw_register, {"OX12JD": (51.7585, -1.2643)}
)
assert skipped == 0
assert universities == [
{
"name": "Oxford University",
"place_type": "university",
"lat": 51.7585,
"lon": -1.2643,
"population": 0,
"travel_destination": True,
}
]