38 lines
1.3 KiB
Python
38 lines
1.3 KiB
Python
import polars as pl
|
|
|
|
from pipeline.utils.postcode_mapping import (
|
|
MAX_REMAP_DISTANCE_M,
|
|
build_postcode_mapping,
|
|
)
|
|
|
|
|
|
def test_remap_drops_terminated_postcodes_beyond_distance_cap(tmp_path):
|
|
# One active England postcode at the origin.
|
|
# One terminated postcode 100m away (legitimate adjacent remap -> mapped).
|
|
# One terminated postcode 5km away (gross misattribution -> dropped).
|
|
arcgis = pl.DataFrame(
|
|
{
|
|
"pcds": ["AB1 1AA", "AB1 1AB", "ZZ9 9ZZ"],
|
|
"ctry25cd": ["E92000001", "E92000001", "E92000001"],
|
|
"doterm": [None, "202001", "202001"],
|
|
"east1m": [500000.0, 500100.0, 505000.0],
|
|
"north1m": [200000.0, 200000.0, 200000.0],
|
|
}
|
|
)
|
|
arcgis_path = tmp_path / "arcgis.parquet"
|
|
arcgis.write_parquet(arcgis_path)
|
|
|
|
mapping = build_postcode_mapping(arcgis_path)
|
|
|
|
# The nearby terminated postcode is remapped onto the active one.
|
|
assert mapping.filter(pl.col("old_postcode") == "AB1 1AB")[
|
|
"new_postcode"
|
|
].to_list() == ["AB1 1AA"]
|
|
|
|
# The far (5km > 1km cap) terminated postcode must NOT appear in the mapping.
|
|
assert "ZZ9 9ZZ" not in mapping["old_postcode"].to_list()
|
|
assert mapping.height == 1
|
|
|
|
|
|
def test_max_remap_distance_constant_is_one_kilometre():
|
|
assert MAX_REMAP_DISTANCE_M == 1000.0
|