This commit is contained in:
Andras Schmelczer 2026-06-02 20:14:32 +01:00
parent fbfebc651c
commit aab85fe32e
33 changed files with 2016 additions and 283 deletions

View file

@ -20,33 +20,48 @@ def compute_voronoi_regions(
# Convert to float64 so sub-metre jitter isn't truncated.
points = points.astype(np.float64)
# Deduplicate points, keeping one per (location, postcode) pair.
# Multiple postcodes at the same coordinate each get their own point,
# jittered by a tiny offset (0.01m) so Voronoi can distinguish them.
# Coords are rounded to mm precision for stable hashing — UPRN inputs are
# already integer metres, but the float64 cast can introduce ULP noise.
GOLDEN_ANGLE = np.pi * (3.0 - np.sqrt(5.0))
# Deduplicate points, keeping one per (location, postcode) pair. Coords are
# rounded to mm precision for stable hashing — UPRN inputs are already integer
# metres, but the float64 cast can introduce ULP noise.
#
# Where several DISTINCT postcodes share one coordinate, jitter ALL of them
# onto a small regular polygon (equal 0.01m radius, equally spaced by angle)
# so their Voronoi cells become equal wedges and NONE is crushed. Leaving any
# seed at the centre — or innermost on a spiral — squeezes its cell below
# MIN_GEOM_AREA, which _clean_polygonal then drops downstream, silently losing
# an active postcode. Seeds at a UNIQUE coordinate are left exactly on their
# UPRN (no perturbation of normal Voronoi output). Coords are rounded to mm
# for stable hashing (the float64 cast can add ULP noise).
rounded_coords = [
(round(float(points[i, 0]), 3), round(float(points[i, 1]), 3))
for i in range(len(points))
]
coord_postcodes: dict[tuple[float, float], set[str]] = defaultdict(set)
for coord, pc in zip(rounded_coords, postcodes):
coord_postcodes[coord].add(pc)
seen: dict[tuple[float, float, str], bool] = {}
unique_pts = []
unique_pcs = []
coord_counts: dict[tuple[float, float], int] = defaultdict(int)
for i in range(len(points)):
coord = (round(float(points[i, 0]), 3), round(float(points[i, 1]), 3))
coord = rounded_coords[i]
key = (coord[0], coord[1], postcodes[i])
if key not in seen:
seen[key] = True
jitter_idx = coord_counts[coord]
coord_counts[coord] += 1
if jitter_idx == 0:
unique_pts.append(points[i].copy())
else:
# Golden-angle spacing distributes any number of jittered
# points evenly around (and outward from) the original coord.
count = len(coord_postcodes[coord])
if count > 1:
# Coincident cluster: equally-spaced regular polygon -> equal
# Voronoi wedges, so every postcode here keeps a fair share.
jitter_idx = coord_counts[coord]
coord_counts[coord] += 1
angle = 2.0 * np.pi * jitter_idx / count
jittered = points[i].copy()
angle = jitter_idx * GOLDEN_ANGLE
jittered[0] += 0.01 * np.cos(angle)
jittered[1] += 0.01 * np.sin(angle)
unique_pts.append(jittered)
else:
unique_pts.append(points[i].copy())
unique_pcs.append(postcodes[i])
if len(unique_pts) == 1: