Last night

This commit is contained in:
Andras Schmelczer 2026-02-08 10:21:37 +00:00
parent 2906b01734
commit 42ee2d4c51
47 changed files with 848 additions and 478 deletions

View file

@ -98,10 +98,9 @@ def main():
if checkpoint_path.exists():
checkpoint_df = pl.read_parquet(checkpoint_path)
# Deduplicate checkpoint rows per postcode, preferring rows with data
checkpoint_df = (
checkpoint_df.sort("public_transport_quick_minutes", nulls_last=True)
.unique(subset=["postcode"], keep="first")
)
checkpoint_df = checkpoint_df.sort(
"public_transport_quick_minutes", nulls_last=True
).unique(subset=["postcode"], keep="first")
completed_postcodes = set(checkpoint_df["postcode"].to_list())
prior_results = [
JourneyResult(
@ -145,9 +144,9 @@ def main():
results_df = results_to_dataframe(all_results)
all_postcodes = {r.postcode for r in all_results}
coords_df = postcodes_df.filter(
pl.col("postcode").is_in(all_postcodes)
).select(["postcode", "lat", "long"])
coords_df = postcodes_df.filter(pl.col("postcode").is_in(all_postcodes)).select(
["postcode", "lat", "long"]
)
results_df = coords_df.join(results_df, on="postcode", how="left")
results_df = results_df.with_columns(