Fmt
This commit is contained in:
parent
479ef92236
commit
c38d654ac7
44 changed files with 2526 additions and 701 deletions
|
|
@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
|||
"""
|
||||
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
|
||||
|
||||
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
active = (
|
||||
arcgis.filter(pl.col("doterm").is_null())
|
||||
.select("pcds", "oseast1m", "osnrth1m")
|
||||
.collect()
|
||||
)
|
||||
terminated = (
|
||||
arcgis.filter(pl.col("doterm").is_not_null())
|
||||
.select("pcds", "oseast1m", "osnrth1m")
|
||||
.collect()
|
||||
)
|
||||
|
||||
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
|
||||
print(
|
||||
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
|
||||
)
|
||||
|
||||
if terminated.height == 0:
|
||||
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
|
||||
return pl.DataFrame(
|
||||
{
|
||||
"old_postcode": pl.Series([], dtype=pl.Utf8),
|
||||
"new_postcode": pl.Series([], dtype=pl.Utf8),
|
||||
}
|
||||
)
|
||||
|
||||
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
|
||||
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
|
||||
active_coords = np.column_stack(
|
||||
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
|
||||
)
|
||||
terminated_coords = np.column_stack(
|
||||
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
|
||||
)
|
||||
|
||||
tree = cKDTree(active_coords)
|
||||
distances, indices = tree.query(terminated_coords)
|
||||
|
||||
active_postcodes = active["pcds"]
|
||||
mapping = pl.DataFrame({
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
})
|
||||
mapping = pl.DataFrame(
|
||||
{
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
}
|
||||
)
|
||||
|
||||
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
|
||||
print(
|
||||
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
|
||||
)
|
||||
|
||||
return mapping
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue