All good
Some checks failed
CI / Check (push) Has been cancelled
Build and publish Docker image / build-and-push (push) Has been cancelled

This commit is contained in:
Andras Schmelczer 2026-05-18 21:20:10 +01:00
parent 6ea544a0f6
commit 6cc7288126
45 changed files with 929 additions and 1043 deletions

View file

@ -9,6 +9,8 @@ import polars as pl
from thefuzz import fuzz
from tqdm import tqdm
from pipeline.local_temp import local_tmp_dir
_NUMBER_RE = re.compile(r"\d+")
_POSTCODE_RE = r"^[A-Z]{1,2}\d[A-Z\d]?\d[A-Z]{2}$"
MIN_FUZZY_SCORE = 60
@ -57,7 +59,7 @@ def fuzzy_join_on_postcode(
have null right columns.
"""
tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_")
tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_", dir=local_tmp_dir())
left_path = Path(tmpdir) / "left.parquet"
right_path = Path(tmpdir) / "right.parquet"