57 lines
1.7 KiB
Python
57 lines
1.7 KiB
Python
import polars as pl
|
|
|
|
from pipeline.utils.nspl_schema import CODE_COL_PATTERN, code_col_overrides
|
|
|
|
|
|
def test_matches_current_and_renamed_suffixes():
|
|
names = ["pcd", "ruc21ind", "oac11ind", "imd20ind", "lat", "long"]
|
|
overrides = code_col_overrides(names)
|
|
assert overrides == {
|
|
"ruc21ind": pl.String,
|
|
"oac11ind": pl.String,
|
|
"imd20ind": pl.String,
|
|
}
|
|
|
|
|
|
def test_catches_future_suffix_bump():
|
|
# The regression: ONS bumps imd20ind -> imd25ind. A hard-coded dict would
|
|
# no-op here; the stem match must still catch it.
|
|
names = ["pcd", "ruc25ind", "oac21ind", "imd25ind"]
|
|
overrides = code_col_overrides(names)
|
|
assert set(overrides) == {"ruc25ind", "oac21ind", "imd25ind"}
|
|
assert all(dtype is pl.String for dtype in overrides.values())
|
|
|
|
|
|
def test_is_case_insensitive():
|
|
overrides = code_col_overrides(["RUC21IND", "Oac11Ind", "IMD20ind"])
|
|
assert set(overrides) == {"RUC21IND", "Oac11Ind", "IMD20ind"}
|
|
|
|
|
|
def test_matches_suffixless_stem():
|
|
assert set(code_col_overrides(["rucind", "oacind", "imdind"])) == {
|
|
"rucind",
|
|
"oacind",
|
|
"imdind",
|
|
}
|
|
|
|
|
|
def test_ignores_unrelated_columns():
|
|
names = [
|
|
"pcd",
|
|
"laua",
|
|
"imdscore", # not an *ind column
|
|
"indicator", # ends in nothing relevant, no ruc/oac/imd stem
|
|
"ruc21indx", # extra trailing char -> not a code column
|
|
"xruc21ind", # leading char -> not anchored at start
|
|
]
|
|
assert code_col_overrides(names) == {}
|
|
|
|
|
|
def test_empty_names():
|
|
assert code_col_overrides([]) == {}
|
|
|
|
|
|
def test_pattern_anchored():
|
|
assert CODE_COL_PATTERN.match("imd25ind")
|
|
assert not CODE_COL_PATTERN.match("oac11indicator")
|
|
assert not CODE_COL_PATTERN.match("region_imd20ind")
|