Fix checkpoint postcode duplication
This commit is contained in:
parent
627ba5496a
commit
ccb7c8fbd7
1 changed files with 5 additions and 2 deletions
|
|
@ -71,9 +71,12 @@ def main():
|
||||||
prior_results: list[JourneyResult] = []
|
prior_results: list[JourneyResult] = []
|
||||||
if checkpoint_path.exists():
|
if checkpoint_path.exists():
|
||||||
checkpoint_df = pl.read_parquet(checkpoint_path)
|
checkpoint_df = pl.read_parquet(checkpoint_path)
|
||||||
completed_postcodes = set(
|
# Deduplicate checkpoint rows per postcode, preferring rows with data
|
||||||
checkpoint_df.filter(pl.col("public_transport_easy_minutes").is_not_null())["postcode"].to_list()
|
checkpoint_df = (
|
||||||
|
checkpoint_df.sort("public_transport_quick_minutes", nulls_last=True)
|
||||||
|
.unique(subset=["postcode"], keep="first")
|
||||||
)
|
)
|
||||||
|
completed_postcodes = set(checkpoint_df["postcode"].to_list())
|
||||||
prior_results = [
|
prior_results = [
|
||||||
JourneyResult(
|
JourneyResult(
|
||||||
postcode=row["postcode"],
|
postcode=row["postcode"],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue