perfect-postcode/pipeline/download/test_transit_network.py

"""Tests for transit_network GTFS processing."""

import zipfile
from pathlib import Path

import pytest

from pipeline.download.transit_network import convert_high_freq_to_frequency_based


def _write_gtfs(path: Path, *, stop_times: str) -> None:
    """Write a minimal GTFS zip with one metro route and several trips."""
    routes = "route_id,route_type\nR1,1\n"
    trips = "trip_id,route_id,direction_id,service_id\n" + "".join(
        f"T{i},R1,0,S1\n" for i in range(1, 7)
    )
    with zipfile.ZipFile(path, "w") as z:
        z.writestr("routes.txt", routes)
        z.writestr("trips.txt", trips)
        z.writestr("stop_times.txt", stop_times)


def _one_based_stop_times() -> str:
    """Six trips, 1-based stop_sequence (1,2,...), 5-minute headway."""
    header = "trip_id,stop_sequence,departure_time,stop_id\n"
    rows = []
    # First departures 06:00, 06:05, ... (300s = 5 min headway, well under 15 min)
    for i in range(6):
        trip = f"T{i + 1}"
        first_dep = 6 * 3600 + i * 300
        h, m = divmod(first_dep, 3600)
        m, s = divmod(m, 60)
        # First stop has stop_sequence 1 (NOT 0); second stop sequence 2.
        rows.append(f"{trip},1,{h:02d}:{m:02d}:{s:02d},STOP_A\n")
        later = first_dep + 120
        h2, m2 = divmod(later, 3600)
        m2, s2 = divmod(m2, 60)
        rows.append(f"{trip},2,{h2:02d}:{m2:02d}:{s2:02d},STOP_B\n")
    return header + "".join(rows)


def test_one_based_stop_sequence_is_converted(tmp_path: Path) -> None:
    """First stop selection must use the minimum stop_sequence, not literal "0".

    With 1-based stop_sequence the old code (keyed on stop_sequence == "0") found
    zero first stops and produced an empty frequencies.txt. The fix selects the
    minimum stop_sequence per trip, so the high-frequency group is converted.
    """
    src = tmp_path / "in.zip"
    dst = tmp_path / "out.zip"
    _write_gtfs(src, stop_times=_one_based_stop_times())

    convert_high_freq_to_frequency_based(src, dst)

    with zipfile.ZipFile(dst, "r") as z:
        freq = z.read("frequencies.txt").decode("utf-8")

    freq_rows = [r for r in freq.splitlines()[1:] if r.strip()]
    # The single high-frequency group must produce exactly one frequency entry.
    assert len(freq_rows) == 1, freq
    trip_id, start_time, end_time, headway_secs, _exact = freq_rows[0].split(",")
    # Template trip is the earliest departure (T1 at 06:00) starting at first stop.
    assert start_time == "06:00:00"
    # Median headway of 300s rounds to a 300s headway entry.
    assert headway_secs == "300"


def test_raises_when_no_first_stops_found(tmp_path: Path) -> None:
    """A non-empty target trip set with unparseable stop_sequence is loud, not silent."""
    src = tmp_path / "in.zip"
    dst = tmp_path / "out.zip"
    bad = (
        "trip_id,stop_sequence,departure_time,stop_id\n"
        "T1,not_a_number,06:00:00,STOP_A\n"
    )
    _write_gtfs(src, stop_times=bad)

    with pytest.raises(RuntimeError, match="no first stops"):
        convert_high_freq_to_frequency_based(src, dst)