This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:22 +01:00
parent d43da9708c
commit fbfebc651c
5 changed files with 295 additions and 0 deletions

View file

@ -0,0 +1,79 @@
"""Tests for transit_network GTFS processing."""
import zipfile
from pathlib import Path
import pytest
from pipeline.download.transit_network import convert_high_freq_to_frequency_based
def _write_gtfs(path: Path, *, stop_times: str) -> None:
"""Write a minimal GTFS zip with one metro route and several trips."""
routes = "route_id,route_type\nR1,1\n"
trips = "trip_id,route_id,direction_id,service_id\n" + "".join(
f"T{i},R1,0,S1\n" for i in range(1, 7)
)
with zipfile.ZipFile(path, "w") as z:
z.writestr("routes.txt", routes)
z.writestr("trips.txt", trips)
z.writestr("stop_times.txt", stop_times)
def _one_based_stop_times() -> str:
"""Six trips, 1-based stop_sequence (1,2,...), 5-minute headway."""
header = "trip_id,stop_sequence,departure_time,stop_id\n"
rows = []
# First departures 06:00, 06:05, ... (300s = 5 min headway, well under 15 min)
for i in range(6):
trip = f"T{i + 1}"
first_dep = 6 * 3600 + i * 300
h, m = divmod(first_dep, 3600)
m, s = divmod(m, 60)
# First stop has stop_sequence 1 (NOT 0); second stop sequence 2.
rows.append(f"{trip},1,{h:02d}:{m:02d}:{s:02d},STOP_A\n")
later = first_dep + 120
h2, m2 = divmod(later, 3600)
m2, s2 = divmod(m2, 60)
rows.append(f"{trip},2,{h2:02d}:{m2:02d}:{s2:02d},STOP_B\n")
return header + "".join(rows)
def test_one_based_stop_sequence_is_converted(tmp_path: Path) -> None:
"""First stop selection must use the minimum stop_sequence, not literal "0".
With 1-based stop_sequence the old code (keyed on stop_sequence == "0") found
zero first stops and produced an empty frequencies.txt. The fix selects the
minimum stop_sequence per trip, so the high-frequency group is converted.
"""
src = tmp_path / "in.zip"
dst = tmp_path / "out.zip"
_write_gtfs(src, stop_times=_one_based_stop_times())
convert_high_freq_to_frequency_based(src, dst)
with zipfile.ZipFile(dst, "r") as z:
freq = z.read("frequencies.txt").decode("utf-8")
freq_rows = [r for r in freq.splitlines()[1:] if r.strip()]
# The single high-frequency group must produce exactly one frequency entry.
assert len(freq_rows) == 1, freq
trip_id, start_time, end_time, headway_secs, _exact = freq_rows[0].split(",")
# Template trip is the earliest departure (T1 at 06:00) starting at first stop.
assert start_time == "06:00:00"
# Median headway of 300s rounds to a 300s headway entry.
assert headway_secs == "300"
def test_raises_when_no_first_stops_found(tmp_path: Path) -> None:
"""A non-empty target trip set with unparseable stop_sequence is loud, not silent."""
src = tmp_path / "in.zip"
dst = tmp_path / "out.zip"
bad = (
"trip_id,stop_sequence,departure_time,stop_id\n"
"T1,not_a_number,06:00:00,STOP_A\n"
)
_write_gtfs(src, stop_times=bad)
with pytest.raises(RuntimeError, match="no first stops"):
convert_high_freq_to_frequency_based(src, dst)