Rerun prepare script

This commit is contained in:
Andras Schmelczer 2026-04-06 11:13:52 +01:00
parent 349a6c1d53
commit 8614acdfae
24 changed files with 1132 additions and 226 deletions

View file

@ -10,19 +10,19 @@ from scipy.spatial import cKDTree
def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
"""Build a mapping from terminated England postcodes to their nearest active postcode.
Uses OS National Grid coordinates (oseast1m, osnrth1m) which are Cartesian metres,
Uses OS National Grid coordinates (east1m, north1m) which are Cartesian metres,
so Euclidean distance via cKDTree gives accurate results without projection.
"""
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry25cd") == "E92000001")
active = (
arcgis.filter(pl.col("doterm").is_null())
.select("pcds", "oseast1m", "osnrth1m")
.select("pcds", "east1m", "north1m")
.collect()
)
terminated = (
arcgis.filter(pl.col("doterm").is_not_null())
.select("pcds", "oseast1m", "osnrth1m")
.select("pcds", "east1m", "north1m")
.collect()
)
@ -39,10 +39,10 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
)
active_coords = np.column_stack(
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
[active["east1m"].to_numpy(), active["north1m"].to_numpy()]
)
terminated_coords = np.column_stack(
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
[terminated["east1m"].to_numpy(), terminated["north1m"].to_numpy()]
)
tree = cKDTree(active_coords)