This commit is contained in:
Andras Schmelczer 2026-03-15 21:22:28 +00:00
parent 479ef92236
commit c38d654ac7
44 changed files with 2526 additions and 701 deletions

View file

@ -27,7 +27,9 @@ def load_england_polygon(geojson_path: Path) -> PreparedGeometry:
return prep(geometry)
def in_england_mask(geojson_path: Path, lats: np.ndarray, lngs: np.ndarray) -> np.ndarray:
def in_england_mask(
geojson_path: Path, lats: np.ndarray, lngs: np.ndarray
) -> np.ndarray:
"""Vectorized check: which (lat, lng) points are within England.
Returns a boolean numpy array.

View file

@ -106,7 +106,9 @@ def count_pois_per_postcode(
if nearby is None:
continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
within_mask = distances <= radius_km
within_indices = nearby[within_mask]
@ -179,7 +181,9 @@ def min_distance_per_postcode(
if nearby is None:
continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
distances = haversine_km(
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
)
for group, cat_mask in category_masks.items():
group_mask = cat_mask[nearby]

View file

@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
"""
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
active = (
arcgis.filter(pl.col("doterm").is_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
terminated = (
arcgis.filter(pl.col("doterm").is_not_null())
.select("pcds", "oseast1m", "osnrth1m")
.collect()
)
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
print(
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
)
if terminated.height == 0:
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
return pl.DataFrame(
{
"old_postcode": pl.Series([], dtype=pl.Utf8),
"new_postcode": pl.Series([], dtype=pl.Utf8),
}
)
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
active_coords = np.column_stack(
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
)
terminated_coords = np.column_stack(
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
)
tree = cKDTree(active_coords)
distances, indices = tree.query(terminated_coords)
active_postcodes = active["pcds"]
mapping = pl.DataFrame({
"old_postcode": terminated["pcds"],
"new_postcode": active_postcodes.gather(indices),
})
mapping = pl.DataFrame(
{
"old_postcode": terminated["pcds"],
"new_postcode": active_postcodes.gather(indices),
}
)
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
print(
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
)
return mapping

View file

@ -72,7 +72,9 @@ def test_no_pois_returns_zeros(postcodes):
"category": pl.Series([], dtype=pl.String),
}
)
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0)
result = count_pois_per_postcode(
postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0
)
for group in POI_GROUPS:
col = f"{group}_2km"
@ -125,7 +127,9 @@ def test_min_distance_no_pois_returns_nan(postcodes):
"category": pl.Series([], dtype=pl.String),
}
)
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
result = min_distance_per_postcode(
postcodes, empty_pois, groups={"train_tube": ["Rail station"]}
)
assert "train_tube_nearest_km" in result.columns
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())