This commit is contained in:
Andras Schmelczer 2026-06-02 13:46:18 +01:00
parent a04ac2d857
commit d43da9708c
47 changed files with 4120 additions and 573 deletions

View file

@ -85,19 +85,42 @@ def _claim_inspire_parcels(
uprn_pts = shp_points(points)
pt_idx, cand_idx = cand_tree.query(uprn_pts, predicate="within")
# First priority: parcels that physically contain UPRNs. Majority vote
# resolves blocks of flats or overlapping parcel data.
# First priority: parcels that physically contain UPRNs. A parcel holding
# UPRNs from a single postcode goes wholly to that postcode. A parcel shared
# by several postcodes (a block of flats spanning postcodes, or overlapping
# parcel data) is split between them via a sub-Voronoi over their own UPRNs
# clipped to the parcel — so EVERY contained postcode keeps part of the
# parcel. A bare majority vote would hand the whole parcel to one winner and
# leave the losers' UPRNs trapped inside claimed land, dropping them from
# both this claim and the `remaining` polygon handed to Voronoi downstream.
cand_postcodes: dict[int, list[str]] = defaultdict(list)
cand_point_idx: dict[int, list[int]] = defaultdict(list)
for pi, ci in zip(pt_idx, cand_idx):
cand_postcodes[ci].append(postcodes[pi])
cand_point_idx[ci].append(pi)
points_f64 = points.astype(np.float64, copy=False)
contained_parts: dict[str, list] = defaultdict(list)
contained_scores: Counter[str] = Counter()
for ci, pc_list in cand_postcodes.items():
pc_counts = Counter(pc_list)
winner, votes = pc_counts.most_common(1)[0]
contained_parts[winner].append(parcels[ci])
contained_scores[winner] += votes
if len(pc_counts) == 1:
winner = next(iter(pc_counts))
contained_parts[winner].append(parcels[ci])
contained_scores[winner] += pc_counts[winner]
continue
# Shared parcel: sub-Voronoi over the contained UPRNs so each postcode
# present keeps a fragment instead of being absorbed by the winner.
sub_idx = cand_point_idx[ci]
sub_points = points_f64[sub_idx]
sub_postcodes = [postcodes[pi] for pi in sub_idx]
for pc, geom in compute_voronoi_regions(
sub_points, sub_postcodes, parcels[ci]
).items():
cleaned = _clean_polygonal(geom)
if cleaned is not None:
contained_parts[pc].append(cleaned)
contained_scores[pc] += pc_counts[pc]
contained_claimed = _merge_parts_by_postcode(contained_parts)
contained_claims = sorted(
@ -109,7 +132,6 @@ def _claim_inspire_parcels(
# each to the nearest UPRN/postcode so parcel boundaries carry more of the
# visible postcode shape; Voronoi is then limited to roads, parks, water, and
# any other non-parcel gaps.
points_f64 = points.astype(np.float64, copy=False)
contained_union = _union_claims(contained_claims)
nearest_tree = cKDTree(points_f64)
nearest_parts: dict[str, list] = defaultdict(list)
@ -235,11 +257,11 @@ def _extract_polygonal(geom) -> Polygon | MultiPolygon | None:
return None
if len(polys) == 1:
return polys[0]
return MultiPolygon(
[
p
for g in polys
for p in (g.geoms if g.geom_type == "MultiPolygon" else [g])
]
)
# Union (not bare MultiPolygon construction): make_valid can emit
# overlapping polygonal parts, and a MultiPolygon of overlapping parts is
# invalid — it double-counts area and makes the next `.difference()` raise
# a TopologyException that aborts the OA (and, in parallel mode, the
# worker). unary_union merges them into a valid geometry.
merged = unary_union(polys)
return merged if not merged.is_empty else None
return None