seems fine

This commit is contained in:
Andras Schmelczer 2026-05-05 22:29:28 +01:00
parent 48983e3b4b
commit 7a1696541f
37 changed files with 4999 additions and 1242 deletions

View file

@ -1,7 +1,7 @@
"""Extract place=* nodes and railway stations from OSM PBF → data/places.parquet.
Extracts named place nodes (cities, towns, suburbs, etc.) and railway stations
(tube, national rail, DLR, etc.) for typeahead search.
Extracts named place nodes and railway stations (tube, national rail, DLR,
etc.) for typeahead search.
Reuses the same england-latest.osm.pbf as pois.py.
"""
@ -21,7 +21,22 @@ from pipeline.utils.england_geometry import (
load_england_polygon,
)
PLACE_TYPES = {"city"}
# Search can use a wider set of OSM place nodes, but travel-time destinations
# must remain restricted to the historical city/station origin set.
SEARCH_PLACE_TYPES = {
"city",
"town",
"village",
"suburb",
"neighbourhood",
"quarter",
"borough",
"locality",
"hamlet",
"isolated_dwelling",
"island",
}
TRAVEL_DESTINATION_PLACE_TYPES = {"city"}
# Suffixes to strip from raw station names before appending the typed suffix.
_STATION_STRIP = (
@ -71,7 +86,13 @@ class PlaceHandler(osmium.SimpleHandler):
self._england = england_polygon
def _add(
self, name: str, place_type: str, lat: float, lon: float, population: int
self,
name: str,
place_type: str,
lat: float,
lon: float,
population: int,
travel_destination: bool,
) -> None:
self.places.append(
{
@ -80,6 +101,7 @@ class PlaceHandler(osmium.SimpleHandler):
"lat": lat,
"lon": lon,
"population": population,
"travel_destination": travel_destination,
}
)
self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False)
@ -107,10 +129,17 @@ class PlaceHandler(osmium.SimpleHandler):
except ValueError:
population = 0
# place=* nodes (cities, towns, suburbs, etc.)
# place=* nodes
place_type = n.tags.get("place")
if place_type in PLACE_TYPES:
self._add(name, place_type, lat, lon, population)
if place_type in SEARCH_PLACE_TYPES:
self._add(
name,
place_type,
lat,
lon,
population,
travel_destination=place_type in TRAVEL_DESTINATION_PLACE_TYPES,
)
return
# Railway stations (tube, national rail, DLR, overground, Elizabeth line)
@ -126,7 +155,14 @@ class PlaceHandler(osmium.SimpleHandler):
):
return
display_name = _station_display_name(name, tags)
self._add(display_name, "station", lat, lon, population)
self._add(
display_name,
"station",
lat,
lon,
population,
travel_destination=True,
)
return
@ -147,7 +183,7 @@ def main() -> None:
pbf_file = args.pbf
england_polygon = load_england_polygon(args.boundary)
print("Extracting place nodes: cities + railway stations")
print("Extracting search place nodes + railway stations")
with tqdm(
unit=" elements",
unit_scale=True,