This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -307,9 +307,14 @@ def convert_high_freq_to_frequency_based(
print(f" Found {len(trip_group_key)} trips on target routes")
# Step 3: Get first departure time and first stop for each target trip
# Step 3: Get first departure time and first stop for each target trip.
# GTFS only requires stop_sequence to be strictly increasing per trip; it
# is NOT required to start at 0 (1-based numbering is common, and BODS is
# consumed raw here without renumbering). So pick the row with the minimum
# stop_sequence per trip rather than keying off the literal "0".
trip_first_dep: dict[str, int] = {}
trip_first_stop: dict[str, str] = {}
trip_min_seq: dict[str, int] = {}
with zin.open("stop_times.txt") as f:
cols = _parse_csv_line(f.readline())
trip_id_idx = cols.index("trip_id")
@ -323,11 +328,25 @@ def convert_high_freq_to_frequency_based(
trip_id = parts[trip_id_idx].strip('"')
if trip_id not in trip_group_key:
continue
if parts[seq_idx].strip('"') == "0":
dep_secs = _parse_gtfs_time(parts[dep_idx])
if dep_secs is not None:
trip_first_dep[trip_id] = dep_secs
trip_first_stop[trip_id] = parts[stop_id_idx].strip('"')
try:
seq = int(parts[seq_idx].strip('"'))
except ValueError:
continue
if trip_id in trip_min_seq and seq >= trip_min_seq[trip_id]:
continue
dep_secs = _parse_gtfs_time(parts[dep_idx])
if dep_secs is None:
continue
trip_min_seq[trip_id] = seq
trip_first_dep[trip_id] = dep_secs
trip_first_stop[trip_id] = parts[stop_id_idx].strip('"')
if trip_group_key and not trip_first_dep:
raise RuntimeError(
"convert_high_freq_to_frequency_based found no first stops for "
f"{len(trip_group_key)} target trips; stop_times.txt may be malformed "
"or stop_sequence parsing failed"
)
# Step 4: Group trips by (route, direction, service, first_stop) and compute headways
groups: dict[tuple[str, ...], list[tuple[str, int]]] = defaultdict(list)