idk2

2026-06-02 13:46:22 +01:00 · 2026-06-02 13:46:22 +01:00 · fbfebc651c
commit fbfebc651c
parent d43da9708c
5 changed files with 295 additions and 0 deletions
--- a/pipeline/download/test_transit_network.py
+++ b/pipeline/download/test_transit_network.py
@ -0,0 +1,79 @@
+"""Tests for transit_network GTFS processing."""
+
+import zipfile
+from pathlib import Path
+
+import pytest
+
+from pipeline.download.transit_network import convert_high_freq_to_frequency_based
+
+
+def _write_gtfs(path: Path, *, stop_times: str) -> None:
+    """Write a minimal GTFS zip with one metro route and several trips."""
+    routes = "route_id,route_type\nR1,1\n"
+    trips = "trip_id,route_id,direction_id,service_id\n" + "".join(
+        f"T{i},R1,0,S1\n" for i in range(1, 7)
+    )
+    with zipfile.ZipFile(path, "w") as z:
+        z.writestr("routes.txt", routes)
+        z.writestr("trips.txt", trips)
+        z.writestr("stop_times.txt", stop_times)
+
+
+def _one_based_stop_times() -> str:
+    """Six trips, 1-based stop_sequence (1,2,...), 5-minute headway."""
+    header = "trip_id,stop_sequence,departure_time,stop_id\n"
+    rows = []
+    # First departures 06:00, 06:05, ... (300s = 5 min headway, well under 15 min)
+    for i in range(6):
+        trip = f"T{i + 1}"
+        first_dep = 6 * 3600 + i * 300
+        h, m = divmod(first_dep, 3600)
+        m, s = divmod(m, 60)
+        # First stop has stop_sequence 1 (NOT 0); second stop sequence 2.
+        rows.append(f"{trip},1,{h:02d}:{m:02d}:{s:02d},STOP_A\n")
+        later = first_dep + 120
+        h2, m2 = divmod(later, 3600)
+        m2, s2 = divmod(m2, 60)
+        rows.append(f"{trip},2,{h2:02d}:{m2:02d}:{s2:02d},STOP_B\n")
+    return header + "".join(rows)
+
+
+def test_one_based_stop_sequence_is_converted(tmp_path: Path) -> None:
+    """First stop selection must use the minimum stop_sequence, not literal "0".
+
+    With 1-based stop_sequence the old code (keyed on stop_sequence == "0") found
+    zero first stops and produced an empty frequencies.txt. The fix selects the
+    minimum stop_sequence per trip, so the high-frequency group is converted.
+    """
+    src = tmp_path / "in.zip"
+    dst = tmp_path / "out.zip"
+    _write_gtfs(src, stop_times=_one_based_stop_times())
+
+    convert_high_freq_to_frequency_based(src, dst)
+
+    with zipfile.ZipFile(dst, "r") as z:
+        freq = z.read("frequencies.txt").decode("utf-8")
+
+    freq_rows = [r for r in freq.splitlines()[1:] if r.strip()]
+    # The single high-frequency group must produce exactly one frequency entry.
+    assert len(freq_rows) == 1, freq
+    trip_id, start_time, end_time, headway_secs, _exact = freq_rows[0].split(",")
+    # Template trip is the earliest departure (T1 at 06:00) starting at first stop.
+    assert start_time == "06:00:00"
+    # Median headway of 300s rounds to a 300s headway entry.
+    assert headway_secs == "300"
+
+
+def test_raises_when_no_first_stops_found(tmp_path: Path) -> None:
+    """A non-empty target trip set with unparseable stop_sequence is loud, not silent."""
+    src = tmp_path / "in.zip"
+    dst = tmp_path / "out.zip"
+    bad = (
+        "trip_id,stop_sequence,departure_time,stop_id\n"
+        "T1,not_a_number,06:00:00,STOP_A\n"
+    )
+    _write_gtfs(src, stop_times=bad)
+
+    with pytest.raises(RuntimeError, match="no first stops"):
+        convert_high_freq_to_frequency_based(src, dst)