Add more data & fix ooms

This commit is contained in:
Andras Schmelczer 2026-01-31 14:39:46 +00:00
parent f60fbec9d4
commit a8cc44ea97
8 changed files with 242 additions and 82 deletions

View file

@ -60,7 +60,7 @@ def fuzzy_join_on_postcode(
.str.to_uppercase()
.alias("_left_postcode"),
)
.collect()
.collect(engine="streaming")
)
right_match = (
@ -74,7 +74,7 @@ def fuzzy_join_on_postcode(
.alias("_right_postcode"),
)
.unique(subset=["_right_address", "_right_postcode"], keep="first")
.collect()
.collect(engine="streaming")
)
# Group right side by postcode for fast lookup