Test changes
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 8m20s
CI / Check (push) Failing after 10m40s

This commit is contained in:
Andras Schmelczer 2026-05-09 11:35:38 +01:00
parent 4c95815dc8
commit be02fc16bb
41 changed files with 4224 additions and 759 deletions

View file

@ -6,6 +6,7 @@ Reuses the same england-latest.osm.pbf as pois.py.
"""
import argparse
import re
from pathlib import Path
import osmium
@ -44,11 +45,37 @@ _STATION_STRIP = (
" underground station",
" railway station",
" dlr station",
" station dlr",
" dlr",
" overground station",
" tram stop",
" station",
)
_DLR_CODE_RE = re.compile(r"ZZDL([A-Z0-9]{3})")
def _is_dlr_station(tags: dict[str, str]) -> bool:
name = tags.get("name", "").lower()
network = tags.get("network", "").lower()
operator = tags.get("operator", "").lower()
return (
"docklands" in network
or "dlr" in network
or "docklands" in operator
or "dlr" in operator
or name.endswith(" dlr")
or " dlr " in name
)
def _is_tram_station(tags: dict[str, str]) -> bool:
if _is_dlr_station(tags):
return False
station_tag = tags.get("station", "")
network = tags.get("network", "").lower()
return station_tag == "light_rail" or "tramlink" in network or "tram" in network
def _station_display_name(name: str, tags: dict[str, str]) -> str:
"""Build a descriptive station name like 'Bank tube station'."""
@ -78,6 +105,96 @@ def _station_display_name(name: str, tags: dict[str, str]) -> str:
return f"{name} {suffix}"
def _station_name_score(name: str) -> tuple[int, int]:
lower = name.lower()
suffix_penalty = int(
lower.endswith(
(
" underground station",
" tube station",
" dlr station",
" railway station",
" rail station",
" station dlr",
" station",
)
)
or lower.endswith(" dlr")
)
return (suffix_penalty, len(name))
def _naptan_dlr_stations(naptan_path: Path) -> list[dict]:
"""Extract station-level DLR destinations from NaPTAN access nodes."""
df = pl.read_parquet(naptan_path)
required = {"id", "name", "category", "lat", "lng"}
missing = required - set(df.columns)
if missing:
raise ValueError(f"NaPTAN file is missing columns: {sorted(missing)}")
rows: dict[str, dict] = {}
for row in df.iter_rows(named=True):
atco_id = str(row["id"] or "")
match = _DLR_CODE_RE.search(atco_id)
if not match:
continue
if row["category"] not in {"Tube station", "Rail station"}:
continue
code = match.group(1)
raw_name = str(row["name"] or "")
if not raw_name:
continue
lat = float(row["lat"])
lon = float(row["lng"])
current = rows.get(code)
if current is None:
rows[code] = {
"raw_name": raw_name,
"lat_sum": lat,
"lon_sum": lon,
"count": 1,
}
continue
current["lat_sum"] += lat
current["lon_sum"] += lon
current["count"] += 1
if _station_name_score(raw_name) < _station_name_score(current["raw_name"]):
current["raw_name"] = raw_name
stations = []
for station in rows.values():
count = station["count"]
display_name = _station_display_name(station["raw_name"], {"network": "DLR"})
stations.append(
{
"name": display_name,
"place_type": "station",
"lat": station["lat_sum"] / count,
"lon": station["lon_sum"] / count,
"population": 0,
"travel_destination": True,
}
)
return sorted(stations, key=lambda station: station["name"])
def _append_naptan_dlr_stations(places: list[dict], naptan_path: Path) -> int:
existing_names = {str(place["name"]).casefold() for place in places}
added = 0
for station in _naptan_dlr_stations(naptan_path):
key = station["name"].casefold()
if key in existing_names:
continue
places.append(station)
existing_names.add(key)
added += 1
return added
class PlaceHandler(osmium.SimpleHandler):
def __init__(self, progress: tqdm, england_polygon) -> None:
super().__init__()
@ -145,14 +262,7 @@ class PlaceHandler(osmium.SimpleHandler):
# Railway stations (tube, national rail, DLR, overground, Elizabeth line)
if n.tags.get("railway") == "station":
tags = dict(n.tags)
station_tag = tags.get("station", "")
network = tags.get("network", "").lower()
# Skip tram stops
if (
station_tag == "light_rail"
or "tramlink" in network
or "tram" in network
):
if _is_tram_station(tags):
return
display_name = _station_display_name(name, tags)
self._add(
@ -178,6 +288,11 @@ def main() -> None:
required=True,
help="England boundary GeoJSON file",
)
parser.add_argument(
"--naptan",
type=Path,
help="Optional NaPTAN parquet file used to add DLR station destinations",
)
args = parser.parse_args()
pbf_file = args.pbf
@ -195,6 +310,9 @@ def main() -> None:
handler.apply_file(str(pbf_file), locations=True)
print(f"Extracted {len(handler.places):,} place nodes")
if args.naptan:
added = _append_naptan_dlr_stations(handler.places, args.naptan)
print(f"Added {added:,} DLR station destinations from NaPTAN")
if handler.places:
df = pl.DataFrame(handler.places)