perfect-postcode/pipeline/journey_times/tfl_client.py

211 lines
6.8 KiB
Python

"""TfL API client for fetching journey times."""
import asyncio
import warnings
from collections.abc import Callable
from http import HTTPStatus
from journey_client import Client
from journey_client.api.journey import (
journey_journey_results_by_path_from_path_to_query_via_query_national_search_query_date_qu as journey_api,
)
from journey_client.models import (
JourneyJourneyResultsByPathFromPathToQueryViaQueryNationalSearchQueryDateQuTimeIs as TimeIs,
)
from journey_client.types import UNSET, Unset
from .config import MAX_DELAY
from .models import Destination, JourneyResult
from .rate_limiter import RateLimiter
async def fetch_journey_for_mode(
client: Client,
rate_limiter: RateLimiter,
from_location: str,
to_location: str,
mode: list[str] | Unset,
journey_date: str,
journey_time: str,
retry_count: int = 5,
) -> int | None:
"""Fetch journey time for a specific mode with rate limiting."""
mode_name = ",".join(mode) if not isinstance(mode, Unset) else "public_transport"
backoff = 1.0
for attempt in range(retry_count):
try:
await rate_limiter.acquire()
response = await journey_api.asyncio_detailed(
from_=from_location,
to=to_location,
client=client,
date=journey_date,
time=journey_time,
time_is=TimeIs.DEPARTING,
mode=mode,
)
if response.status_code == HTTPStatus.OK and response.parsed:
journeys = response.parsed.journeys
if not isinstance(journeys, Unset) and journeys:
durations = [
j.duration
for j in journeys
if not isinstance(j.duration, Unset)
]
if durations:
return min(durations)
return None
elif response.status_code in (
HTTPStatus.TOO_MANY_REQUESTS,
HTTPStatus.INTERNAL_SERVER_ERROR,
HTTPStatus.BAD_GATEWAY,
HTTPStatus.SERVICE_UNAVAILABLE,
HTTPStatus.GATEWAY_TIMEOUT,
):
warnings.warn(
f"HTTP {response.status_code.value} for {mode_name} from {from_location}, "
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
stacklevel=2,
)
await asyncio.sleep(backoff)
backoff = min(backoff * 2, MAX_DELAY)
continue
else:
return None
except Exception as e:
warnings.warn(
f"Network error for {mode_name} from {from_location}: {e}, "
f"retrying in {backoff:.1f}s (attempt {attempt + 1}/{retry_count})",
stacklevel=2,
)
await asyncio.sleep(backoff)
backoff = min(backoff * 2, MAX_DELAY)
continue
warnings.warn(
f"Failed to fetch {mode_name} from {from_location} after {retry_count} attempts",
stacklevel=2,
)
return None
async def fetch_all_modes(
client: Client,
rate_limiter: RateLimiter,
postcode: str,
lat: float,
lon: float,
to_location: str,
journey_date: str,
journey_time: str,
semaphore: asyncio.Semaphore,
) -> JourneyResult:
"""Fetch journey times for all transport modes using coordinates."""
async with semaphore:
try:
from_location = f"{lat},{lon}"
walking = await fetch_journey_for_mode(
client,
rate_limiter,
from_location,
to_location,
["walking"],
journey_date,
journey_time,
)
cycling = await fetch_journey_for_mode(
client,
rate_limiter,
from_location,
to_location,
["cycle"],
journey_date,
journey_time,
)
public = await fetch_journey_for_mode(
client,
rate_limiter,
from_location,
to_location,
UNSET,
journey_date,
journey_time,
)
options = [
("walking", walking),
("cycling", cycling),
("public_transport", public),
]
valid_options = [(mode, time) for mode, time in options if time is not None]
if valid_options:
fastest_mode, fastest_time = min(valid_options, key=lambda x: x[1])
else:
fastest_mode, fastest_time = None, None
return JourneyResult(
postcode=postcode,
walking_minutes=walking,
cycling_minutes=cycling,
public_transport_minutes=public,
fastest_minutes=fastest_time,
fastest_mode=fastest_mode,
)
except Exception as e:
return JourneyResult(postcode=postcode, error=str(e))
async def fetch_journey_times(
postcode_data: list[tuple[str, float, float]],
dest: Destination,
journey_date: str,
journey_time: str,
max_concurrent: int = 2,
progress_callback: Callable[[JourneyResult], None] | None = None,
) -> list[JourneyResult]:
"""Fetch journey times for all postcodes with rate limiting.
Args:
postcode_data: List of (postcode, lat, lon) tuples
dest: Destination for journey planning
journey_date: Date in YYYYMMDD format
journey_time: Time in HHMM format
max_concurrent: Maximum concurrent API requests
progress_callback: Optional callback called with each result
Returns:
List of JourneyResult objects in the same order as postcode_data
"""
semaphore = asyncio.Semaphore(max_concurrent)
to_location = dest.to_tfl_location()
rate_limiter = RateLimiter()
client = Client(base_url="https://api.tfl.gov.uk", timeout=30.0)
async with client as client:
tasks = [
fetch_all_modes(
client,
rate_limiter,
pc,
lat,
lon,
to_location,
journey_date,
journey_time,
semaphore,
)
for pc, lat, lon in postcode_data
]
results = []
for coro in asyncio.as_completed(tasks):
result = await coro
results.append(result)
if progress_callback:
progress_callback(result)
postcode_to_result = {r.postcode: r for r in results}
return [postcode_to_result[pc] for pc, _, _ in postcode_data]