Fmt
This commit is contained in:
parent
479ef92236
commit
c38d654ac7
44 changed files with 2526 additions and 701 deletions
|
|
@ -27,7 +27,9 @@ def load_england_polygon(geojson_path: Path) -> PreparedGeometry:
|
|||
return prep(geometry)
|
||||
|
||||
|
||||
def in_england_mask(geojson_path: Path, lats: np.ndarray, lngs: np.ndarray) -> np.ndarray:
|
||||
def in_england_mask(
|
||||
geojson_path: Path, lats: np.ndarray, lngs: np.ndarray
|
||||
) -> np.ndarray:
|
||||
"""Vectorized check: which (lat, lng) points are within England.
|
||||
|
||||
Returns a boolean numpy array.
|
||||
|
|
|
|||
|
|
@ -106,7 +106,9 @@ def count_pois_per_postcode(
|
|||
if nearby is None:
|
||||
continue
|
||||
|
||||
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
|
||||
distances = haversine_km(
|
||||
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
|
||||
)
|
||||
|
||||
within_mask = distances <= radius_km
|
||||
within_indices = nearby[within_mask]
|
||||
|
|
@ -179,7 +181,9 @@ def min_distance_per_postcode(
|
|||
if nearby is None:
|
||||
continue
|
||||
|
||||
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
|
||||
distances = haversine_km(
|
||||
poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i]
|
||||
)
|
||||
|
||||
for group, cat_mask in category_masks.items():
|
||||
group_mask = cat_mask[nearby]
|
||||
|
|
|
|||
|
|
@ -15,26 +15,49 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
|||
"""
|
||||
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")
|
||||
|
||||
active = arcgis.filter(pl.col("doterm").is_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
terminated = arcgis.filter(pl.col("doterm").is_not_null()).select("pcds", "oseast1m", "osnrth1m").collect()
|
||||
active = (
|
||||
arcgis.filter(pl.col("doterm").is_null())
|
||||
.select("pcds", "oseast1m", "osnrth1m")
|
||||
.collect()
|
||||
)
|
||||
terminated = (
|
||||
arcgis.filter(pl.col("doterm").is_not_null())
|
||||
.select("pcds", "oseast1m", "osnrth1m")
|
||||
.collect()
|
||||
)
|
||||
|
||||
print(f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}")
|
||||
print(
|
||||
f"Active postcodes: {active.height}, terminated postcodes: {terminated.height}"
|
||||
)
|
||||
|
||||
if terminated.height == 0:
|
||||
return pl.DataFrame({"old_postcode": pl.Series([], dtype=pl.Utf8), "new_postcode": pl.Series([], dtype=pl.Utf8)})
|
||||
return pl.DataFrame(
|
||||
{
|
||||
"old_postcode": pl.Series([], dtype=pl.Utf8),
|
||||
"new_postcode": pl.Series([], dtype=pl.Utf8),
|
||||
}
|
||||
)
|
||||
|
||||
active_coords = np.column_stack([active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()])
|
||||
terminated_coords = np.column_stack([terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()])
|
||||
active_coords = np.column_stack(
|
||||
[active["oseast1m"].to_numpy(), active["osnrth1m"].to_numpy()]
|
||||
)
|
||||
terminated_coords = np.column_stack(
|
||||
[terminated["oseast1m"].to_numpy(), terminated["osnrth1m"].to_numpy()]
|
||||
)
|
||||
|
||||
tree = cKDTree(active_coords)
|
||||
distances, indices = tree.query(terminated_coords)
|
||||
|
||||
active_postcodes = active["pcds"]
|
||||
mapping = pl.DataFrame({
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
})
|
||||
mapping = pl.DataFrame(
|
||||
{
|
||||
"old_postcode": terminated["pcds"],
|
||||
"new_postcode": active_postcodes.gather(indices),
|
||||
}
|
||||
)
|
||||
|
||||
print(f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m")
|
||||
print(
|
||||
f"Postcode mapping: max distance = {distances.max():.0f}m, median = {np.median(distances):.0f}m"
|
||||
)
|
||||
|
||||
return mapping
|
||||
|
|
|
|||
|
|
@ -72,7 +72,9 @@ def test_no_pois_returns_zeros(postcodes):
|
|||
"category": pl.Series([], dtype=pl.String),
|
||||
}
|
||||
)
|
||||
result = count_pois_per_postcode(postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0)
|
||||
result = count_pois_per_postcode(
|
||||
postcodes, empty_pois, groups=POI_GROUPS, radius_km=2.0
|
||||
)
|
||||
|
||||
for group in POI_GROUPS:
|
||||
col = f"{group}_2km"
|
||||
|
|
@ -125,7 +127,9 @@ def test_min_distance_no_pois_returns_nan(postcodes):
|
|||
"category": pl.Series([], dtype=pl.String),
|
||||
}
|
||||
)
|
||||
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
|
||||
result = min_distance_per_postcode(
|
||||
postcodes, empty_pois, groups={"train_tube": ["Rail station"]}
|
||||
)
|
||||
|
||||
assert "train_tube_nearest_km" in result.columns
|
||||
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue