Improve data
This commit is contained in:
parent
b4d66a28c1
commit
85da1941aa
31 changed files with 901 additions and 319 deletions
|
|
@ -10,6 +10,26 @@ EARTH_RADIUS_KM = 6371.0088
|
|||
KM_PER_DEGREE_LAT = 111.32
|
||||
DEFAULT_GRID_SIZE_DEGREES = 0.02
|
||||
|
||||
# Generous GB/UK bounding box. The ArcGIS postcode source stores grid-less
|
||||
# postcodes with a placeholder coordinate (lat=99.999999, lon=0.0); these are
|
||||
# finite, so an isfinite() check alone lets them through and produces absurd
|
||||
# ~5,000 km "nearest amenity" distances. Reject anything outside this box so
|
||||
# such postcodes get NaN distance / zero counts instead of a fabricated value.
|
||||
UK_LAT_MIN, UK_LAT_MAX = 49.0, 61.5
|
||||
UK_LON_MIN, UK_LON_MAX = -9.0, 2.5
|
||||
|
||||
|
||||
def valid_uk_coords_mask(lats: np.ndarray, lons: np.ndarray) -> np.ndarray:
|
||||
"""Boolean mask of coordinates that are finite AND within the UK bbox."""
|
||||
return (
|
||||
np.isfinite(lats)
|
||||
& np.isfinite(lons)
|
||||
& (lats >= UK_LAT_MIN)
|
||||
& (lats <= UK_LAT_MAX)
|
||||
& (lons >= UK_LON_MIN)
|
||||
& (lons <= UK_LON_MAX)
|
||||
)
|
||||
|
||||
|
||||
def _build_poi_grid(
|
||||
pois: pl.DataFrame, grid_size: float = 0.05
|
||||
|
|
@ -43,7 +63,12 @@ def _get_nearby_indices(
|
|||
grid_size: float = DEFAULT_GRID_SIZE_DEGREES,
|
||||
) -> np.ndarray | None:
|
||||
"""Get POI indices from all grid cells intersecting the radius bounding box."""
|
||||
if not np.isfinite(pc_lat) or not np.isfinite(pc_lon):
|
||||
if (
|
||||
not np.isfinite(pc_lat)
|
||||
or not np.isfinite(pc_lon)
|
||||
or not (UK_LAT_MIN <= pc_lat <= UK_LAT_MAX)
|
||||
or not (UK_LON_MIN <= pc_lon <= UK_LON_MAX)
|
||||
):
|
||||
return None
|
||||
|
||||
lat_delta = radius_km / KM_PER_DEGREE_LAT
|
||||
|
|
@ -182,7 +207,7 @@ def min_distance_per_postcode(
|
|||
pc_lats = postcodes_df["lat"].to_numpy()
|
||||
pc_lons = postcodes_df["lon"].to_numpy()
|
||||
pc_codes = postcodes_df["postcode"].to_list()
|
||||
valid_pc_mask = np.isfinite(pc_lats) & np.isfinite(pc_lons)
|
||||
valid_pc_mask = valid_uk_coords_mask(pc_lats, pc_lons)
|
||||
valid_pc_indices = np.flatnonzero(valid_pc_mask)
|
||||
|
||||
result_min_dist = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue