This commit is contained in:
Andras Schmelczer 2026-02-14 12:53:29 +00:00
parent 3a3f899ea2
commit 128b3191e7
68 changed files with 28060 additions and 1152 deletions

View file

@ -1,6 +1,7 @@
"""Extract place=* nodes from OSM PBF → data/places.parquet.
"""Extract place=* nodes and railway stations from OSM PBF → data/places.parquet.
Extracts named place nodes (cities, towns, suburbs, etc.) for typeahead search.
Extracts named place nodes (cities, towns, suburbs, etc.) and railway stations
(tube, national rail, DLR, etc.) for typeahead search.
Reuses the same great-britain-latest.osm.pbf as pois.py.
"""
@ -18,13 +19,54 @@ PLACE_TYPES = {
"borough",
"town",
"suburb",
"quarter",
"neighbourhood",
"village",
"hamlet",
"locality",
"island",
"isolated_dwelling",
}
# Suffixes to strip from raw station names before appending the typed suffix.
_STATION_STRIP = (
" tube station",
" underground station",
" railway station",
" dlr station",
" overground station",
" tram stop",
" station",
)
def _station_display_name(name: str, tags: dict[str, str]) -> str:
"""Build a descriptive station name like 'Bank tube station'."""
station_tag = tags.get("station", "")
network = tags.get("network", "").lower()
if station_tag == "subway" or "underground" in network:
suffix = "tube station"
elif "docklands" in network or "dlr" in network:
suffix = "DLR station"
elif "overground" in network:
suffix = "overground station"
elif "elizabeth" in network:
suffix = "Elizabeth line station"
elif station_tag == "light_rail" or "tramlink" in network or "tram" in network:
suffix = "tram stop"
else:
suffix = "railway station"
# Strip any existing station suffix from the raw name
lower = name.lower()
for s in _STATION_STRIP:
if lower.endswith(s):
name = name[: len(name) - len(s)].rstrip()
break
return f"{name} {suffix}"
class PlaceHandler(osmium.SimpleHandler):
def __init__(self, progress: tqdm) -> None:
@ -32,6 +74,12 @@ class PlaceHandler(osmium.SimpleHandler):
self._progress = progress
self.places: list[dict] = []
def _add(self, name: str, place_type: str, lat: float, lon: float, population: int) -> None:
self.places.append(
{"name": name, "place_type": place_type, "lat": lat, "lon": lon, "population": population}
)
self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False)
def node(self, n: osmium.osm.Node) -> None:
self._progress.update(1)
if not n.location.valid:
@ -39,16 +87,28 @@ class PlaceHandler(osmium.SimpleHandler):
lat, lon = n.location.lat, n.location.lon
if not (UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH and UK_BBOX_WEST <= lon <= UK_BBOX_EAST):
return
place_type = n.tags.get("place")
if place_type not in PLACE_TYPES:
return
name = n.tags.get("name:en", n.tags.get("name", ""))
if not name:
return
self.places.append(
{"name": name, "place_type": place_type, "lat": lat, "lon": lon}
)
self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False)
pop_str = n.tags.get("population", "")
try:
population = int(pop_str)
except ValueError:
population = 0
# place=* nodes (cities, towns, suburbs, etc.)
place_type = n.tags.get("place")
if place_type in PLACE_TYPES:
self._add(name, place_type, lat, lon, population)
return
# railway=station nodes (tube, national rail, DLR, tram, etc.)
if n.tags.get("railway") == "station":
display_name = _station_display_name(name, dict(n.tags))
self._add(display_name, "station", lat, lon, population)
return
def main() -> None:
@ -73,7 +133,7 @@ def main() -> None:
else:
print(f"Using cached PBF: {pbf_file}")
print(f"Extracting place nodes: {sorted(PLACE_TYPES)}")
print(f"Extracting place nodes: {sorted(PLACE_TYPES)} + railway=station")
with tqdm(
unit=" elements",
unit_scale=True,