idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -6,6 +6,16 @@ import numpy as np
|
|||
import polars as pl
|
||||
from scipy.spatial import cKDTree
|
||||
|
||||
# Maximum distance (in OS National Grid metres) a terminated postcode may be from its
|
||||
# nearest active successor to be remapped. Beyond this we treat the postcode as having no
|
||||
# legitimate successor (e.g. demolished/redeveloped land) rather than re-homing it onto a
|
||||
# geometrically-nearest-but-unrelated postcode on a different street/estate/LSOA, which
|
||||
# would pollute the successor's crime/deprivation/school/noise/rent and price stats.
|
||||
# 1km is conservative: it keeps legitimate adjacent remaps while dropping gross
|
||||
# misattributions; dropped postcodes keep their terminated code and fall out at the
|
||||
# active-postcode filter downstream (the honest outcome confirmed by the merge audit).
|
||||
MAX_REMAP_DISTANCE_M = 1000.0
|
||||
|
||||
|
||||
def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
||||
"""Build a mapping from terminated England postcodes to their nearest active postcode.
|
||||
|
|
@ -50,18 +60,30 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
|||
)
|
||||
|
||||
tree = cKDTree(active_coords)
|
||||
distances, indices = tree.query(terminated_coords)
|
||||
distances, indices = tree.query(
|
||||
terminated_coords, distance_upper_bound=MAX_REMAP_DISTANCE_M
|
||||
)
|
||||
|
||||
# cKDTree returns distance=inf and index==len(active) for points with no neighbour
|
||||
# within the bound. Drop those terminated postcodes rather than gather an out-of-range
|
||||
# index; they keep their terminated code and fall out at the active-postcode filter.
|
||||
within_bound = np.isfinite(distances)
|
||||
dropped = int((~within_bound).sum())
|
||||
|
||||
active_postcodes = active["pcds"]
|
||||
mapping = pl.DataFrame(
|
||||
{
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
"old_postcode": terminated["pcds"].filter(pl.Series(within_bound)),
|
||||
"new_postcode": active_postcodes.gather(indices[within_bound]),
|
||||
}
|
||||
)
|
||||
|
||||
kept_distances = distances[within_bound]
|
||||
print(
|
||||
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
|
||||
f"Postcode mapping: {dropped} terminated postcodes dropped (> {MAX_REMAP_DISTANCE_M:.0f}m), "
|
||||
f"max distance = {kept_distances.max():.0f}m, median = {np.median(kept_distances):.0f}m"
|
||||
if kept_distances.size
|
||||
else f"Postcode mapping: {dropped} terminated postcodes dropped (> {MAX_REMAP_DISTANCE_M:.0f}m), none remapped"
|
||||
)
|
||||
|
||||
return mapping
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue