"""Extract place=* nodes and railway stations from OSM PBF → data/places.parquet.

Extracts named place nodes (cities, towns, suburbs, etc.) and railway stations
(tube, national rail, DLR, etc.) for typeahead search.
Reuses the same great-britain-latest.osm.pbf as pois.py.
"""

import argparse
from pathlib import Path

import osmium
import polars as pl
from tqdm import tqdm

from .pois import UK_BBOX_EAST, UK_BBOX_NORTH, UK_BBOX_SOUTH, UK_BBOX_WEST

PLACE_TYPES = {
    "city",
    "borough",
    "town",
    "suburb",
    "quarter",
    "neighbourhood",
    "village",
    "hamlet",
    "locality",
    "island",
    "isolated_dwelling",
}

# Suffixes to strip from raw station names before appending the typed suffix.
_STATION_STRIP = (
    " tube station",
    " underground station",
    " railway station",
    " dlr station",
    " overground station",
    " tram stop",
    " station",
)


def _station_display_name(name: str, tags: dict[str, str]) -> str:
    """Build a descriptive station name like 'Bank tube station'."""
    station_tag = tags.get("station", "")
    network = tags.get("network", "").lower()

    if station_tag == "subway" or "underground" in network:
        suffix = "tube station"
    elif "docklands" in network or "dlr" in network:
        suffix = "DLR station"
    elif "overground" in network:
        suffix = "overground station"
    elif "elizabeth" in network:
        suffix = "Elizabeth line station"
    elif station_tag == "light_rail" or "tramlink" in network or "tram" in network:
        suffix = "tram stop"
    else:
        suffix = "railway station"

    # Strip any existing station suffix from the raw name
    lower = name.lower()
    for s in _STATION_STRIP:
        if lower.endswith(s):
            name = name[: len(name) - len(s)].rstrip()
            break

    return f"{name} {suffix}"


class PlaceHandler(osmium.SimpleHandler):
    def __init__(self, progress: tqdm) -> None:
        super().__init__()
        self._progress = progress
        self.places: list[dict] = []

    def _add(
        self, name: str, place_type: str, lat: float, lon: float, population: int
    ) -> None:
        self.places.append(
            {
                "name": name,
                "place_type": place_type,
                "lat": lat,
                "lon": lon,
                "population": population,
            }
        )
        self._progress.set_postfix(places=f"{len(self.places):,}", refresh=False)

    def node(self, n: osmium.osm.Node) -> None:
        self._progress.update(1)
        if not n.location.valid:
            return
        lat, lon = n.location.lat, n.location.lon
        if not (
            UK_BBOX_SOUTH <= lat <= UK_BBOX_NORTH
            and UK_BBOX_WEST <= lon <= UK_BBOX_EAST
        ):
            return

        name = n.tags.get("name:en", n.tags.get("name", ""))
        if not name:
            return

        pop_str = n.tags.get("population", "")
        try:
            population = int(pop_str)
        except ValueError:
            population = 0

        # place=* nodes (cities, towns, suburbs, etc.)
        place_type = n.tags.get("place")
        if place_type in PLACE_TYPES:
            self._add(name, place_type, lat, lon, population)
            return

        # railway=station nodes (tube, national rail, DLR, tram, etc.)
        if n.tags.get("railway") == "station":
            display_name = _station_display_name(name, dict(n.tags))
            self._add(display_name, "station", lat, lon, population)
            return


def main() -> None:
    parser = argparse.ArgumentParser(description="Extract place names from OSM PBF")
    parser.add_argument(
        "--output", type=Path, required=True, help="Output parquet file path"
    )
    parser.add_argument(
        "--pbf", type=Path, required=True, help="Path to OSM PBF file"
    )
    args = parser.parse_args()

    pbf_file = args.pbf
    print(f"Extracting place nodes: {sorted(PLACE_TYPES)} + railway=station")
    with tqdm(
        unit=" elements",
        unit_scale=True,
        desc="Streaming",
        smoothing=0.05,
        mininterval=1.0,
    ) as progress:
        handler = PlaceHandler(progress)
        handler.apply_file(str(pbf_file), locations=True)

    print(f"Extracted {len(handler.places):,} place nodes")

    if handler.places:
        df = pl.DataFrame(handler.places)
        args.output.parent.mkdir(parents=True, exist_ok=True)
        df.write_parquet(args.output)
        print(f"Saved to {args.output}")
    else:
        print("No places found — skipping output")


if __name__ == "__main__":
    main()