Test changes
This commit is contained in:
parent
4c95815dc8
commit
be02fc16bb
41 changed files with 4224 additions and 759 deletions
|
|
@ -6,6 +6,7 @@ Reuses the same england-latest.osm.pbf as pois.py.
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import osmium
|
||||
|
|
@ -44,11 +45,37 @@ _STATION_STRIP = (
|
|||
" underground station",
|
||||
" railway station",
|
||||
" dlr station",
|
||||
" station dlr",
|
||||
" dlr",
|
||||
" overground station",
|
||||
" tram stop",
|
||||
" station",
|
||||
)
|
||||
|
||||
_DLR_CODE_RE = re.compile(r"ZZDL([A-Z0-9]{3})")
|
||||
|
||||
|
||||
def _is_dlr_station(tags: dict[str, str]) -> bool:
|
||||
name = tags.get("name", "").lower()
|
||||
network = tags.get("network", "").lower()
|
||||
operator = tags.get("operator", "").lower()
|
||||
return (
|
||||
"docklands" in network
|
||||
or "dlr" in network
|
||||
or "docklands" in operator
|
||||
or "dlr" in operator
|
||||
or name.endswith(" dlr")
|
||||
or " dlr " in name
|
||||
)
|
||||
|
||||
|
||||
def _is_tram_station(tags: dict[str, str]) -> bool:
|
||||
if _is_dlr_station(tags):
|
||||
return False
|
||||
station_tag = tags.get("station", "")
|
||||
network = tags.get("network", "").lower()
|
||||
return station_tag == "light_rail" or "tramlink" in network or "tram" in network
|
||||
|
||||
|
||||
def _station_display_name(name: str, tags: dict[str, str]) -> str:
|
||||
"""Build a descriptive station name like 'Bank tube station'."""
|
||||
|
|
@ -78,6 +105,96 @@ def _station_display_name(name: str, tags: dict[str, str]) -> str:
|
|||
return f"{name} {suffix}"
|
||||
|
||||
|
||||
def _station_name_score(name: str) -> tuple[int, int]:
|
||||
lower = name.lower()
|
||||
suffix_penalty = int(
|
||||
lower.endswith(
|
||||
(
|
||||
" underground station",
|
||||
" tube station",
|
||||
" dlr station",
|
||||
" railway station",
|
||||
" rail station",
|
||||
" station dlr",
|
||||
" station",
|
||||
)
|
||||
)
|
||||
or lower.endswith(" dlr")
|
||||
)
|
||||
return (suffix_penalty, len(name))
|
||||
|
||||
|
||||
def _naptan_dlr_stations(naptan_path: Path) -> list[dict]:
|
||||
"""Extract station-level DLR destinations from NaPTAN access nodes."""
|
||||
df = pl.read_parquet(naptan_path)
|
||||
required = {"id", "name", "category", "lat", "lng"}
|
||||
missing = required - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"NaPTAN file is missing columns: {sorted(missing)}")
|
||||
|
||||
rows: dict[str, dict] = {}
|
||||
for row in df.iter_rows(named=True):
|
||||
atco_id = str(row["id"] or "")
|
||||
match = _DLR_CODE_RE.search(atco_id)
|
||||
if not match:
|
||||
continue
|
||||
if row["category"] not in {"Tube station", "Rail station"}:
|
||||
continue
|
||||
|
||||
code = match.group(1)
|
||||
raw_name = str(row["name"] or "")
|
||||
if not raw_name:
|
||||
continue
|
||||
|
||||
lat = float(row["lat"])
|
||||
lon = float(row["lng"])
|
||||
current = rows.get(code)
|
||||
if current is None:
|
||||
rows[code] = {
|
||||
"raw_name": raw_name,
|
||||
"lat_sum": lat,
|
||||
"lon_sum": lon,
|
||||
"count": 1,
|
||||
}
|
||||
continue
|
||||
|
||||
current["lat_sum"] += lat
|
||||
current["lon_sum"] += lon
|
||||
current["count"] += 1
|
||||
if _station_name_score(raw_name) < _station_name_score(current["raw_name"]):
|
||||
current["raw_name"] = raw_name
|
||||
|
||||
stations = []
|
||||
for station in rows.values():
|
||||
count = station["count"]
|
||||
display_name = _station_display_name(station["raw_name"], {"network": "DLR"})
|
||||
stations.append(
|
||||
{
|
||||
"name": display_name,
|
||||
"place_type": "station",
|
||||
"lat": station["lat_sum"] / count,
|
||||
"lon": station["lon_sum"] / count,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
}
|
||||
)
|
||||
|
||||
return sorted(stations, key=lambda station: station["name"])
|
||||
|
||||
|
||||
def _append_naptan_dlr_stations(places: list[dict], naptan_path: Path) -> int:
|
||||
existing_names = {str(place["name"]).casefold() for place in places}
|
||||
added = 0
|
||||
for station in _naptan_dlr_stations(naptan_path):
|
||||
key = station["name"].casefold()
|
||||
if key in existing_names:
|
||||
continue
|
||||
places.append(station)
|
||||
existing_names.add(key)
|
||||
added += 1
|
||||
return added
|
||||
|
||||
|
||||
class PlaceHandler(osmium.SimpleHandler):
|
||||
def __init__(self, progress: tqdm, england_polygon) -> None:
|
||||
super().__init__()
|
||||
|
|
@ -145,14 +262,7 @@ class PlaceHandler(osmium.SimpleHandler):
|
|||
# Railway stations (tube, national rail, DLR, overground, Elizabeth line)
|
||||
if n.tags.get("railway") == "station":
|
||||
tags = dict(n.tags)
|
||||
station_tag = tags.get("station", "")
|
||||
network = tags.get("network", "").lower()
|
||||
# Skip tram stops
|
||||
if (
|
||||
station_tag == "light_rail"
|
||||
or "tramlink" in network
|
||||
or "tram" in network
|
||||
):
|
||||
if _is_tram_station(tags):
|
||||
return
|
||||
display_name = _station_display_name(name, tags)
|
||||
self._add(
|
||||
|
|
@ -178,6 +288,11 @@ def main() -> None:
|
|||
required=True,
|
||||
help="England boundary GeoJSON file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--naptan",
|
||||
type=Path,
|
||||
help="Optional NaPTAN parquet file used to add DLR station destinations",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
pbf_file = args.pbf
|
||||
|
|
@ -195,6 +310,9 @@ def main() -> None:
|
|||
handler.apply_file(str(pbf_file), locations=True)
|
||||
|
||||
print(f"Extracted {len(handler.places):,} place nodes")
|
||||
if args.naptan:
|
||||
added = _append_naptan_dlr_stations(handler.places, args.naptan)
|
||||
print(f"Added {added:,} DLR station destinations from NaPTAN")
|
||||
|
||||
if handler.places:
|
||||
df = pl.DataFrame(handler.places)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue