lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
|
|
@ -154,14 +154,16 @@ def fuzzy_join_on_postcode(
|
|||
left_cached = pl.scan_parquet(left_path)
|
||||
right_cached = pl.scan_parquet(right_path)
|
||||
|
||||
return (
|
||||
result = (
|
||||
left_cached.join(mapping, on="_left_idx", how="left")
|
||||
.join(right_cached, on="_right_idx", how="left")
|
||||
.drop("_left_idx", "_right_idx")
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
except BaseException:
|
||||
finally:
|
||||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||
raise
|
||||
|
||||
return result.lazy()
|
||||
|
||||
|
||||
def _numbers_compatible(a: str, b: str) -> bool:
|
||||
|
|
@ -180,7 +182,7 @@ def _numbers_compatible(a: str, b: str) -> bool:
|
|||
|
||||
|
||||
def _score_bucket(
|
||||
args: tuple[list[tuple[int, str]], list[tuple[int, str]], int],
|
||||
args: tuple[list[tuple[int, str]], list[tuple[int, str]]],
|
||||
) -> list[tuple[int, int, int]]:
|
||||
"""Score all address pairs within a single postcode bucket."""
|
||||
left_entries, right_entries = args
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue