"""Extract place=* nodes from OSM PBF → data/places.parquet. Extracts named place nodes (cities, towns, suburbs, etc.) for typeahead search. Reuses the same great-britain-latest.osm.pbf as pois.py. """ import argparse from pathlib import Path import osmium import polars as pl from tqdm import tqdm from .pois import UK_BBOX_EAST, UK_BBOX_NORTH, UK_BBOX_SOUTH, UK_BBOX_WEST, download_pbf PLACE_TYPES = { "city", "borough", "town", "suburb", "neighbourhood", "village", "hamlet", "locality", "isolated_dwelling", } class PlaceHandler(osmium.SimpleHandler): def __init__(self, progress: tqdm) -> None: super().__init__() self._progress = progress self.places: list[dict] = [] def node(self, n: osmium.osm.Node) -> None: self._progress.update(1) if not n.location.valid: return lat, lon = n.location.lat, n.location.lon if not (UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH and UK_BBOX_WEST <= lon <= UK_BBOX_EAST): return place_type = n.tags.get("place") if place_type not in PLACE_TYPES: return name = n.tags.get("name:en", n.tags.get("name", "")) if not name: return self.places.append( {"name": name, "place_type": place_type, "lat": lat, "lon": lon} ) self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False) def main() -> None: parser = argparse.ArgumentParser( description="Extract place names from OSM PBF" ) parser.add_argument( "--output", type=Path, required=True, help="Output parquet file path" ) parser.add_argument( "--pbf", type=Path, default=None, help="Path to existing PBF file (skips download)" ) args = parser.parse_args() if args.pbf and args.pbf.exists(): pbf_file = args.pbf print(f"Using existing PBF: {pbf_file}") else: pbf_file = Path("data/great-britain-latest.osm.pbf") if not pbf_file.exists(): download_pbf(pbf_file) else: print(f"Using cached PBF: {pbf_file}") print(f"Extracting place nodes: {sorted(PLACE_TYPES)}") with tqdm( unit=" elements", unit_scale=True, desc="Streaming", smoothing=0.05, mininterval=1.0, ) as progress: handler = PlaceHandler(progress) handler.apply_file(str(pbf_file), locations=True) print(f"Extracted {len(handler.places):,} place nodes") if handler.places: df = pl.DataFrame(handler.places) args.output.parent.mkdir(parents=True, exist_ok=True) df.write_parquet(args.output) print(f"Saved to {args.output}") else: print("No places found — skipping output") if __name__ == "__main__": main()