idgf
This commit is contained in:
parent
fbfebc651c
commit
aab85fe32e
33 changed files with 2016 additions and 283 deletions
|
|
@ -27,7 +27,7 @@ from .output import (
|
|||
to_wgs84_geojson_multi,
|
||||
write_district_geojson,
|
||||
)
|
||||
from .process_oa import _extract_polygonal, process_oa
|
||||
from .process_oa import MIN_GEOM_AREA, _extract_polygonal, process_oa
|
||||
from .uprn import get_oa_uprns, load_uprns
|
||||
from .voronoi import _equal_split_fallback, compute_voronoi_regions
|
||||
|
||||
|
|
@ -341,6 +341,65 @@ class TestVoronoiDeduplication:
|
|||
assert "B" in result, "Postcode B missing with int64 coords"
|
||||
|
||||
|
||||
class TestVoronoiCoincidentClusterNotCrushed:
|
||||
"""3+ postcodes at one coordinate must each keep a real cell.
|
||||
|
||||
Pre-fix, the first coincident postcode stayed unjittered at the exact
|
||||
cluster centre; with other seeds in the OA its Voronoi cell was squeezed
|
||||
below MIN_GEOM_AREA, so _clean_polygonal dropped that active postcode
|
||||
downstream. The fix spreads coincident postcodes onto a small regular
|
||||
polygon (equal wedges), so none is crushed.
|
||||
"""
|
||||
|
||||
def test_coincident_cluster_plus_outer_seed_no_postcode_crushed(self):
|
||||
# A block of flats: 4 distinct postcodes share one building coordinate,
|
||||
# plus one other postcode elsewhere in the OA. Pre-fix, the centre seed's
|
||||
# cell collapsed to ~0.0001 m^2 (< MIN_GEOM_AREA) and the postcode was
|
||||
# dropped; every postcode must now keep a non-degenerate cell.
|
||||
boundary = box(0, 0, 1000, 1000)
|
||||
points = np.array(
|
||||
[
|
||||
[500, 500], # A — coincident
|
||||
[500, 500], # B — coincident
|
||||
[500, 500], # C — coincident
|
||||
[500, 500], # D — coincident
|
||||
[100, 100], # OUT — elsewhere in the OA
|
||||
],
|
||||
dtype=np.float64,
|
||||
)
|
||||
postcodes = ["A", "B", "C", "D", "OUT"]
|
||||
result = compute_voronoi_regions(points, postcodes, boundary)
|
||||
for pc in postcodes:
|
||||
assert pc in result, f"Postcode {pc} was dropped"
|
||||
assert result[pc].area > MIN_GEOM_AREA, (
|
||||
f"Postcode {pc} cell {result[pc].area} <= MIN_GEOM_AREA"
|
||||
)
|
||||
|
||||
def test_coincident_cluster_partitions_into_fair_wedges(self, square_boundary):
|
||||
# N postcodes sharing one coordinate split the surrounding area into
|
||||
# roughly equal wedges (regular-polygon seeds), none degenerate.
|
||||
points = np.array([[500050, 180050]] * 5, dtype=np.float64)
|
||||
postcodes = ["A", "B", "C", "D", "E"]
|
||||
result = compute_voronoi_regions(points, postcodes, square_boundary)
|
||||
fair_share = square_boundary.area / len(postcodes)
|
||||
for pc in postcodes:
|
||||
assert pc in result, f"Postcode {pc} was dropped"
|
||||
# Each wedge is a meaningful fraction of its fair share (not crushed).
|
||||
assert result[pc].area > 0.3 * fair_share, (
|
||||
f"Postcode {pc} cell {result[pc].area} far below fair share {fair_share}"
|
||||
)
|
||||
|
||||
def test_two_coincident_split_is_fair(self, square_boundary):
|
||||
"""Regression: two postcodes at one coordinate split ~50/50."""
|
||||
points = np.array([[500050, 180050], [500050, 180050]], dtype=np.float64)
|
||||
postcodes = ["A", "B"]
|
||||
result = compute_voronoi_regions(points, postcodes, square_boundary)
|
||||
assert "A" in result and "B" in result
|
||||
total = result["A"].area + result["B"].area
|
||||
assert result["A"].area / total > 0.4
|
||||
assert result["B"].area / total > 0.4
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 4: Voronoi collinear fallback gives everything to first postcode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -20,33 +20,48 @@ def compute_voronoi_regions(
|
|||
# Convert to float64 so sub-metre jitter isn't truncated.
|
||||
points = points.astype(np.float64)
|
||||
|
||||
# Deduplicate points, keeping one per (location, postcode) pair.
|
||||
# Multiple postcodes at the same coordinate each get their own point,
|
||||
# jittered by a tiny offset (0.01m) so Voronoi can distinguish them.
|
||||
# Coords are rounded to mm precision for stable hashing — UPRN inputs are
|
||||
# already integer metres, but the float64 cast can introduce ULP noise.
|
||||
GOLDEN_ANGLE = np.pi * (3.0 - np.sqrt(5.0))
|
||||
# Deduplicate points, keeping one per (location, postcode) pair. Coords are
|
||||
# rounded to mm precision for stable hashing — UPRN inputs are already integer
|
||||
# metres, but the float64 cast can introduce ULP noise.
|
||||
#
|
||||
# Where several DISTINCT postcodes share one coordinate, jitter ALL of them
|
||||
# onto a small regular polygon (equal 0.01m radius, equally spaced by angle)
|
||||
# so their Voronoi cells become equal wedges and NONE is crushed. Leaving any
|
||||
# seed at the centre — or innermost on a spiral — squeezes its cell below
|
||||
# MIN_GEOM_AREA, which _clean_polygonal then drops downstream, silently losing
|
||||
# an active postcode. Seeds at a UNIQUE coordinate are left exactly on their
|
||||
# UPRN (no perturbation of normal Voronoi output). Coords are rounded to mm
|
||||
# for stable hashing (the float64 cast can add ULP noise).
|
||||
rounded_coords = [
|
||||
(round(float(points[i, 0]), 3), round(float(points[i, 1]), 3))
|
||||
for i in range(len(points))
|
||||
]
|
||||
coord_postcodes: dict[tuple[float, float], set[str]] = defaultdict(set)
|
||||
for coord, pc in zip(rounded_coords, postcodes):
|
||||
coord_postcodes[coord].add(pc)
|
||||
|
||||
seen: dict[tuple[float, float, str], bool] = {}
|
||||
unique_pts = []
|
||||
unique_pcs = []
|
||||
coord_counts: dict[tuple[float, float], int] = defaultdict(int)
|
||||
for i in range(len(points)):
|
||||
coord = (round(float(points[i, 0]), 3), round(float(points[i, 1]), 3))
|
||||
coord = rounded_coords[i]
|
||||
key = (coord[0], coord[1], postcodes[i])
|
||||
if key not in seen:
|
||||
seen[key] = True
|
||||
jitter_idx = coord_counts[coord]
|
||||
coord_counts[coord] += 1
|
||||
if jitter_idx == 0:
|
||||
unique_pts.append(points[i].copy())
|
||||
else:
|
||||
# Golden-angle spacing distributes any number of jittered
|
||||
# points evenly around (and outward from) the original coord.
|
||||
count = len(coord_postcodes[coord])
|
||||
if count > 1:
|
||||
# Coincident cluster: equally-spaced regular polygon -> equal
|
||||
# Voronoi wedges, so every postcode here keeps a fair share.
|
||||
jitter_idx = coord_counts[coord]
|
||||
coord_counts[coord] += 1
|
||||
angle = 2.0 * np.pi * jitter_idx / count
|
||||
jittered = points[i].copy()
|
||||
angle = jitter_idx * GOLDEN_ANGLE
|
||||
jittered[0] += 0.01 * np.cos(angle)
|
||||
jittered[1] += 0.01 * np.sin(angle)
|
||||
unique_pts.append(jittered)
|
||||
else:
|
||||
unique_pts.append(points[i].copy())
|
||||
unique_pcs.append(postcodes[i])
|
||||
|
||||
if len(unique_pts) == 1:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue