SPlit up
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 15s
CI / Check (push) Failing after 1m58s

This commit is contained in:
Andras Schmelczer 2026-06-12 21:51:37 +01:00
parent cf39ad754e
commit f59d01227b
91 changed files with 10370 additions and 7562 deletions

View file

@ -7,6 +7,7 @@ from pathlib import Path
import pytest
from pipeline.download.transit_network import (
clean_national_rail_gtfs,
convert_high_freq_to_frequency_based,
validate_gtfs_feed,
)
@ -69,6 +70,46 @@ def test_one_based_stop_sequence_is_converted(tmp_path: Path) -> None:
assert headway_secs == "300"
def test_clean_national_rail_gtfs_orders_by_stop_sequence_not_file_order(
tmp_path: Path,
) -> None:
"""dtd2mysql exports happen to be ordered by stop_sequence within each
trip, but nothing guarantees it. Rows arriving out of order must be sorted
by their original stop_sequence before the backwards-time check and the
0-based renumbering file order would flag the trip as backwards and drop
it (or scramble the stop order)."""
src = tmp_path / "in.zip"
dst = tmp_path / "out.zip"
with zipfile.ZipFile(src, "w") as z:
z.writestr(
"stops.txt",
"stop_id,stop_lat,stop_lon\nSTOP_A,51.5,-0.1\nSTOP_B,51.6,-0.1\n",
)
z.writestr("routes.txt", "route_id,route_type\nR1,2\n")
z.writestr("trips.txt", "trip_id,route_id,service_id\nT1,R1,S1\n")
# File order is seq 2 then seq 1: in file order departures look
# backwards (07:00 then 06:00); in sequence order they are fine.
z.writestr(
"stop_times.txt",
"trip_id,stop_id,stop_sequence,departure_time\n"
"T1,STOP_B,2,07:00:00\n"
"T1,STOP_A,1,06:00:00\n",
)
clean_national_rail_gtfs(src, dst)
with zipfile.ZipFile(dst, "r") as z:
stop_times = z.read("stop_times.txt").decode("utf-8").splitlines()
trips = z.read("trips.txt").decode("utf-8").splitlines()
assert trips == ["trip_id,route_id,service_id", "T1,R1,S1"]
assert stop_times == [
"trip_id,stop_id,stop_sequence,departure_time",
"T1,STOP_A,0,06:00:00",
"T1,STOP_B,1,07:00:00",
]
def test_raises_when_no_first_stops_found(tmp_path: Path) -> None:
"""A non-empty target trip set with unparseable stop_sequence is loud, not silent."""
src = tmp_path / "in.zip"