All good
This commit is contained in:
parent
6ea544a0f6
commit
6cc7288126
45 changed files with 929 additions and 1043 deletions
|
|
@ -9,6 +9,8 @@ import polars as pl
|
|||
from thefuzz import fuzz
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
|
||||
_NUMBER_RE = re.compile(r"\d+")
|
||||
_POSTCODE_RE = r"^[A-Z]{1,2}\d[A-Z\d]?\d[A-Z]{2}$"
|
||||
MIN_FUZZY_SCORE = 60
|
||||
|
|
@ -57,7 +59,7 @@ def fuzzy_join_on_postcode(
|
|||
have null right columns.
|
||||
"""
|
||||
|
||||
tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_")
|
||||
tmpdir = tempfile.mkdtemp(prefix="fuzzy_join_", dir=local_tmp_dir())
|
||||
left_path = Path(tmpdir) / "left.parquet"
|
||||
right_path = Path(tmpdir) / "right.parquet"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue