Add more data & fix ooms

This commit is contained in:
Andras Schmelczer 2026-01-31 14:39:46 +00:00
parent f60fbec9d4
commit a8cc44ea97
8 changed files with 242 additions and 82 deletions

View file

@ -5,7 +5,13 @@ from datetime import date, timedelta
import polars as pl
from tqdm import tqdm
from .config import DESTINATIONS, MAX_CONCURRENT, MAX_POSTCODES, OUTPUT_DIR, MAX_DISTANCE_KM
from .config import (
DESTINATIONS,
MAX_CONCURRENT,
MAX_POSTCODES,
OUTPUT_DIR,
MAX_DISTANCE_KM,
)
from .results import CheckpointSaver, results_to_dataframe, save_results
from .tfl_client import fetch_journey_times
from pipeline.utils import haversine_km_expr
@ -31,7 +37,9 @@ def main():
# Filter to postcodes within range of destination
postcodes_df = postcodes_df.with_columns(
haversine_km_expr("lat", "long", destination.lat, destination.lon).alias("distance_km")
haversine_km_expr("lat", "long", destination.lat, destination.lon).alias(
"distance_km"
)
).filter(pl.col("distance_km") <= MAX_DISTANCE_KM)
print(f"Filtered to {postcodes_df.height:,} postcodes within {MAX_DISTANCE_KM}km")
@ -50,7 +58,9 @@ def main():
checkpoint_saver = CheckpointSaver(
destination_name=destination.name,
on_save=lambda path, count: print(f"Checkpoint saved: {count:,} results to {path}"),
on_save=lambda path, count: print(
f"Checkpoint saved: {count:,} results to {path}"
),
)
def on_result(result):