lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
40
pipeline/utils/postcode_mapping.py
Normal file
40
pipeline/utils/postcode_mapping.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""Map terminated postcodes to their nearest active successor using OS grid coordinates."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
from scipy.spatial import cKDTree
|
||||
|
||||
|
||||
def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
||||
"""Build a mapping from terminated England postcodes to their nearest active postcode.
|
||||
|
||||
Uses OS National Grid coordinates (oseast1m, osnrth1m) which are Cartesian meters,
|
||||
so Euclidean distance via cKDTree gives accurate results without projection.
|
||||
"""
|
||||
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
|
||||
|
||||
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
|
||||
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
|
||||
|
||||
if terminated.height == 0:
|
||||
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
|
||||
|
||||
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
|
||||
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
|
||||
|
||||
tree = cKDTree(active_coords)
|
||||
distances, indices = tree.query(terminated_coords)
|
||||
|
||||
active_postcodes = active["pcds"]
|
||||
mapping = pl.DataFrame({
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
})
|
||||
|
||||
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
|
||||
|
||||
return mapping
|
||||
Loading…
Add table
Add a link
Reference in a new issue