This commit is contained in:
Andras Schmelczer 2026-05-14 08:09:19 +01:00
parent a8165249a4
commit a4103b0896
64 changed files with 5376 additions and 3832 deletions

View file

@ -1,6 +1,6 @@
import polars as pl
from pipeline.utils import fuzzy_join_on_postcode
from pipeline.utils import fuzzy_join_on_postcode, normalize_postcode_key
def test_fuzzy_join_on_postcode_matches_addresses_within_postcode():
@ -132,3 +132,22 @@ def test_fuzzy_join_on_postcode_rejects_blank_and_invalid_match_keys():
{"left_id": "number_only", "right_address": None},
{"left_id": "valid", "right_address": "10 High Street"},
]
def test_normalize_postcode_key_requires_full_postcode():
df = pl.DataFrame(
{
"postcode": [
" SW1A 1AA ",
"sw1a-1aa",
"",
"SW1A",
"12345",
"not a postcode",
]
}
)
result = df.select(normalize_postcode_key(pl.col("postcode")).alias("key"))
assert result["key"].to_list() == ["SW1A1AA", "SW1A1AA", None, None, None, None]