"""Tests for transit_network GTFS processing.""" import zipfile from pathlib import Path import pytest from pipeline.download.transit_network import convert_high_freq_to_frequency_based def _write_gtfs(path: Path, *, stop_times: str) -> None: """Write a minimal GTFS zip with one metro route and several trips.""" routes = "route_id,route_type\nR1,1\n" trips = "trip_id,route_id,direction_id,service_id\n" + "".join( f"T{i},R1,0,S1\n" for i in range(1, 7) ) with zipfile.ZipFile(path, "w") as z: z.writestr("routes.txt", routes) z.writestr("trips.txt", trips) z.writestr("stop_times.txt", stop_times) def _one_based_stop_times() -> str: """Six trips, 1-based stop_sequence (1,2,...), 5-minute headway.""" header = "trip_id,stop_sequence,departure_time,stop_id\n" rows = [] # First departures 06:00, 06:05, ... (300s = 5 min headway, well under 15 min) for i in range(6): trip = f"T{i + 1}" first_dep = 6 * 3600 + i * 300 h, m = divmod(first_dep, 3600) m, s = divmod(m, 60) # First stop has stop_sequence 1 (NOT 0); second stop sequence 2. rows.append(f"{trip},1,{h:02d}:{m:02d}:{s:02d},STOP_A\n") later = first_dep + 120 h2, m2 = divmod(later, 3600) m2, s2 = divmod(m2, 60) rows.append(f"{trip},2,{h2:02d}:{m2:02d}:{s2:02d},STOP_B\n") return header + "".join(rows) def test_one_based_stop_sequence_is_converted(tmp_path: Path) -> None: """First stop selection must use the minimum stop_sequence, not literal "0". With 1-based stop_sequence the old code (keyed on stop_sequence == "0") found zero first stops and produced an empty frequencies.txt. The fix selects the minimum stop_sequence per trip, so the high-frequency group is converted. """ src = tmp_path / "in.zip" dst = tmp_path / "out.zip" _write_gtfs(src, stop_times=_one_based_stop_times()) convert_high_freq_to_frequency_based(src, dst) with zipfile.ZipFile(dst, "r") as z: freq = z.read("frequencies.txt").decode("utf-8") freq_rows = [r for r in freq.splitlines()[1:] if r.strip()] # The single high-frequency group must produce exactly one frequency entry. assert len(freq_rows) == 1, freq trip_id, start_time, end_time, headway_secs, _exact = freq_rows[0].split(",") # Template trip is the earliest departure (T1 at 06:00) starting at first stop. assert start_time == "06:00:00" # Median headway of 300s rounds to a 300s headway entry. assert headway_secs == "300" def test_raises_when_no_first_stops_found(tmp_path: Path) -> None: """A non-empty target trip set with unparseable stop_sequence is loud, not silent.""" src = tmp_path / "in.zip" dst = tmp_path / "out.zip" bad = ( "trip_id,stop_sequence,departure_time,stop_id\n" "T1,not_a_number,06:00:00,STOP_A\n" ) _write_gtfs(src, stop_times=bad) with pytest.raises(RuntimeError, match="no first stops"): convert_high_freq_to_frequency_based(src, dst)