Fix checkpoint postcode duplication
This commit is contained in:
parent
627ba5496a
commit
ccb7c8fbd7
1 changed files with 5 additions and 2 deletions
|
|
@ -71,9 +71,12 @@ def main():
|
|||
prior_results: list[JourneyResult] = []
|
||||
if checkpoint_path.exists():
|
||||
checkpoint_df = pl.read_parquet(checkpoint_path)
|
||||
completed_postcodes = set(
|
||||
checkpoint_df.filter(pl.col("public_transport_easy_minutes").is_not_null())["postcode"].to_list()
|
||||
# Deduplicate checkpoint rows per postcode, preferring rows with data
|
||||
checkpoint_df = (
|
||||
checkpoint_df.sort("public_transport_quick_minutes", nulls_last=True)
|
||||
.unique(subset=["postcode"], keep="first")
|
||||
)
|
||||
completed_postcodes = set(checkpoint_df["postcode"].to_list())
|
||||
prior_results = [
|
||||
JourneyResult(
|
||||
postcode=row["postcode"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue