import polars as pl from pipeline.utils.postcode_mapping import ( MAX_REMAP_DISTANCE_M, build_postcode_mapping, ) def test_remap_drops_terminated_postcodes_beyond_distance_cap(tmp_path): # One active England postcode at the origin. # One terminated postcode 100m away (legitimate adjacent remap -> mapped). # One terminated postcode 5km away (gross misattribution -> dropped). arcgis = pl.DataFrame( { "pcds": ["AB1 1AA", "AB1 1AB", "ZZ9 9ZZ"], "ctry25cd": ["E92000001", "E92000001", "E92000001"], "doterm": [None, "202001", "202001"], "east1m": [500000.0, 500100.0, 505000.0], "north1m": [200000.0, 200000.0, 200000.0], } ) arcgis_path = tmp_path / "arcgis.parquet" arcgis.write_parquet(arcgis_path) mapping = build_postcode_mapping(arcgis_path) # The nearby terminated postcode is remapped onto the active one. assert mapping.filter(pl.col("old_postcode") == "AB1 1AB")[ "new_postcode" ].to_list() == ["AB1 1AA"] # The far (5km > 1km cap) terminated postcode must NOT appear in the mapping. assert "ZZ9 9ZZ" not in mapping["old_postcode"].to_list() assert mapping.height == 1 def test_max_remap_distance_constant_is_one_kilometre(): assert MAX_REMAP_DISTANCE_M == 1000.0