don't crash
This commit is contained in:
parent
aab85fe32e
commit
d6d20ccd37
13 changed files with 2630 additions and 3924 deletions
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""Robust GEOS overlay helpers.
|
||||
|
||||
Overlay operations (union, difference, intersection) can raise a
|
||||
``GEOSException`` — most often ``TopologyException: side location conflict``,
|
||||
``Ring edge missing``, or ``found non-noded intersection`` — on geometries that
|
||||
contain near-coincident or near-degenerate edges, or that are individually
|
||||
invalid. The robust remedy is a *fixed-precision* overlay: GEOS's OverlayNG
|
||||
engine, handed a grid size, nodes every edge onto that grid and finishes where
|
||||
the full-precision overlay cannot.
|
||||
|
||||
Getting the fallback to actually survive takes three rules, each of which we
|
||||
learned the hard way from a crash:
|
||||
|
||||
1. **Never precision-reduce with the default mode.** ``set_precision``'s default
|
||||
``valid_output`` (and ``keep_collapsed``) mode runs its *own* noding pass that
|
||||
re-raises the very ``side location conflict`` we are trying to escape. We push
|
||||
the grid into the overlay via the ``grid_size`` argument instead — where
|
||||
OverlayNG nodes robustly — and only ever call ``set_precision`` in
|
||||
``pointwise`` mode (pure coordinate rounding, which cannot raise).
|
||||
2. **Validate first.** ``make_valid`` repairs the self-intersections (bow-ties,
|
||||
pinches) that make GEOS choke, so the overlay starts from an OGC-valid shape.
|
||||
3. **Keep only polygonal parts.** ``make_valid`` of a spiky polygon routinely
|
||||
returns a *mixed-dimension* ``GeometryCollection`` (a polygon plus a dangling
|
||||
line), and OverlayNG rejects mixed-dimension input with
|
||||
``IllegalArgumentException: Overlay input is mixed-dimension``. Every input
|
||||
here represents an area, so the line/point debris is meaningless noise and is
|
||||
dropped before the overlay.
|
||||
|
||||
The default fallback grid is 0.1 mm in the metre-based working CRS
|
||||
(EPSG:27700), far below survey resolution and the ``MIN_GEOM_AREA`` threshold
|
||||
downstream, so it cannot crush a postcode-scale shape. Callers operating in a
|
||||
different CRS (e.g. the WGS84 output stage, where the same 1e-4 grid would be
|
||||
~11 m) pass a ``grid`` matched to that CRS. The whole fallback runs ONLY after
|
||||
the exact operation has already failed, so normal output is bit-for-bit
|
||||
unchanged.
|
||||
"""
|
||||
|
||||
import shapely
|
||||
from shapely import GEOSException, make_valid, set_precision
|
||||
from shapely.geometry import Polygon
|
||||
from shapely.ops import unary_union
|
||||
|
||||
# 0.1 mm in metres — well below MIN_GEOM_AREA (0.01 m^2) and survey resolution.
|
||||
_SNAP_GRID = 1e-4
|
||||
|
||||
_EMPTY = Polygon()
|
||||
|
||||
|
||||
def _poly_valid(geom, grid):
|
||||
"""Return an OGC-valid, polygonal-only version of ``geom``.
|
||||
|
||||
Repairs invalidity with ``make_valid`` and discards any non-polygonal debris
|
||||
(dangling lines/points) it leaves behind, since OverlayNG rejects
|
||||
mixed-dimension ``GeometryCollection`` input. The result is always a
|
||||
``Polygon``/``MultiPolygon`` (possibly empty), safe to feed to a
|
||||
fixed-precision overlay.
|
||||
"""
|
||||
g = geom if geom.is_valid else make_valid(geom)
|
||||
if g.geom_type in ("Polygon", "MultiPolygon"):
|
||||
return g
|
||||
if g.geom_type == "GeometryCollection":
|
||||
polys = [
|
||||
p
|
||||
for p in g.geoms
|
||||
if p.geom_type in ("Polygon", "MultiPolygon") and not p.is_empty
|
||||
]
|
||||
if not polys:
|
||||
return _EMPTY
|
||||
if len(polys) == 1:
|
||||
return polys[0]
|
||||
# Dissolve on the grid (robust) rather than building a possibly-invalid
|
||||
# MultiPolygon of overlapping parts.
|
||||
return shapely.union_all(polys, grid_size=grid)
|
||||
return _EMPTY # line / point only
|
||||
|
||||
|
||||
def _snap_poly_valid(geom, grid):
|
||||
"""Coordinate-round onto ``grid`` (``pointwise`` never raises), then clean."""
|
||||
return _poly_valid(set_precision(geom, grid, mode="pointwise"), grid)
|
||||
|
||||
|
||||
def safe_union(geoms, grid=_SNAP_GRID):
|
||||
"""``unary_union`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return unary_union(geoms)
|
||||
except GEOSException:
|
||||
pass
|
||||
cleaned = [_poly_valid(g, grid) for g in geoms]
|
||||
try:
|
||||
return shapely.union_all(cleaned, grid_size=grid)
|
||||
except GEOSException:
|
||||
snapped = [_snap_poly_valid(g, grid) for g in geoms]
|
||||
return shapely.union_all(snapped, grid_size=grid)
|
||||
|
||||
|
||||
def safe_difference(a, b, grid=_SNAP_GRID):
|
||||
"""``a.difference(b)`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return a.difference(b)
|
||||
except GEOSException:
|
||||
pass
|
||||
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||
try:
|
||||
return shapely.difference(a2, b2, grid_size=grid)
|
||||
except GEOSException:
|
||||
return shapely.difference(
|
||||
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||
)
|
||||
|
||||
|
||||
def safe_intersection(a, b, grid=_SNAP_GRID):
|
||||
"""``a.intersection(b)`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return a.intersection(b)
|
||||
except GEOSException:
|
||||
pass
|
||||
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||
try:
|
||||
return shapely.intersection(a2, b2, grid_size=grid)
|
||||
except GEOSException:
|
||||
return shapely.intersection(
|
||||
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||
)
|
||||
|
|
@ -5,9 +5,10 @@ from pathlib import Path
|
|||
import polars as pl
|
||||
from shapely import wkb
|
||||
from shapely.geometry import MultiPolygon, Polygon
|
||||
from shapely.ops import unary_union
|
||||
from shapely.strtree import STRtree
|
||||
|
||||
from .geometry import safe_difference, safe_union
|
||||
|
||||
|
||||
def load_greenspace(path: Path) -> tuple[STRtree, list]:
|
||||
"""Load greenspace parquet and build an STRtree spatial index.
|
||||
|
|
@ -51,8 +52,8 @@ def subtract_greenspace(
|
|||
if not intersecting:
|
||||
return postcode_geom
|
||||
|
||||
green_union = unary_union(intersecting)
|
||||
result = postcode_geom.difference(green_union)
|
||||
green_union = safe_union(intersecting)
|
||||
result = safe_difference(postcode_geom, green_union)
|
||||
|
||||
if result.is_empty:
|
||||
return postcode_geom
|
||||
|
|
|
|||
|
|
@ -7,11 +7,12 @@ import numpy as np
|
|||
from pyproj import Transformer
|
||||
from shapely import STRtree, make_valid, set_precision
|
||||
from shapely.errors import GEOSException
|
||||
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
|
||||
from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
|
||||
from shapely.ops import transform as transform_geometry
|
||||
from shapely.ops import unary_union
|
||||
from tqdm import tqdm
|
||||
|
||||
from .geometry import safe_difference, safe_union
|
||||
|
||||
_to_wgs84 = None
|
||||
|
||||
|
||||
|
|
@ -207,7 +208,7 @@ def merge_fragments(
|
|||
|
||||
merged = {}
|
||||
for pc, parts in by_postcode.items():
|
||||
combined = unary_union(parts)
|
||||
combined = safe_union(parts)
|
||||
if combined.is_empty:
|
||||
continue
|
||||
if not combined.is_valid:
|
||||
|
|
@ -260,7 +261,10 @@ def _polygonal(geom):
|
|||
]
|
||||
if not polys:
|
||||
return None
|
||||
merged = unary_union(polys)
|
||||
# Both callers run on WGS84-degree output geometry, so the robustness
|
||||
# fallback snaps on the 1e-6° grid (~0.11 m), not geometry.py's metre
|
||||
# default — a coarse metre grid would obliterate a degree-scale shape.
|
||||
merged = safe_union(polys, grid=_OUTPUT_PRECISION_DEG)
|
||||
return merged if not merged.is_empty else None
|
||||
return None
|
||||
|
||||
|
|
@ -317,8 +321,13 @@ def _resolve_overlaps(
|
|||
# Process losers from highest priority down, so every subtracted neighbour is
|
||||
# already finalised.
|
||||
for i in sorted(higher, key=lambda idx: rank[idx]):
|
||||
cut = unary_union([out[j] for j in higher[i]])
|
||||
trimmed = out[i].difference(cut)
|
||||
# These geometries are WGS84 degrees already snapped to output precision,
|
||||
# so the robustness fallback snaps on the same 1e-6° grid (~0.11 m) rather
|
||||
# than geometry.py's metre-CRS default. A raw difference can still raise a
|
||||
# "side location conflict" on near-coincident OA-seam edges; the
|
||||
# fixed-precision overlay noded them away.
|
||||
cut = safe_union([out[j] for j in higher[i]], grid=_OUTPUT_PRECISION_DEG)
|
||||
trimmed = safe_difference(out[i], cut, grid=_OUTPUT_PRECISION_DEG)
|
||||
if not trimmed.is_valid:
|
||||
trimmed = make_valid(trimmed)
|
||||
# Keep all polygonal parts: these geometries are in WGS84 degrees, so an
|
||||
|
|
@ -336,13 +345,83 @@ def _round_coords(coords, ndigits=6):
|
|||
return [_round_coords(c, ndigits) for c in coords]
|
||||
|
||||
|
||||
def _snap_polygonal(geom, grid):
|
||||
"""Re-node ``geom`` onto ``grid`` (``valid_output``) → polygonal-only, or None.
|
||||
|
||||
``set_precision`` ``valid_output`` runs an OverlayNG noding pass that places
|
||||
every vertex on a multiple of ``grid`` *and* fixes the topology, so a plain
|
||||
coordinate round of the result is exact (no two distinct vertices can land in
|
||||
the same cell). Falls back to ``make_valid`` if precision-reduction raises.
|
||||
"""
|
||||
try:
|
||||
snapped = set_precision(geom, grid, mode="valid_output")
|
||||
except GEOSException:
|
||||
snapped = make_valid(geom)
|
||||
return _polygonal(snapped if snapped.is_valid else make_valid(snapped))
|
||||
|
||||
|
||||
# A square this many output-grid cells on a side, used as the last-resort
|
||||
# footprint when snapping erases a sub-grid sliver. ~10 cells (≈0.7–1.1 m at UK
|
||||
# latitudes) is invisible at map scale yet survives the 1e-6° snap as a valid,
|
||||
# 4-corner ring.
|
||||
_FOOTPRINT_GRID_CELLS = 5
|
||||
|
||||
|
||||
def _grid_footprint(geom):
|
||||
"""A tiny grid-aligned square at ``geom``'s representative point, snapped valid.
|
||||
|
||||
Last line of defence so an *active* postcode never vanishes: the de-overlap
|
||||
pass can shave a small (e.g. co-located, non-geographic) postcode down to a
|
||||
sub-grid sliver that disappears when snapped to output precision. Rather than
|
||||
drop it, place a minimal valid footprint at its location. The tiny overlap
|
||||
this re-creates with the neighbour that trimmed it is harmless — the output
|
||||
partition is best-effort, a missing boundary is a hard validation failure.
|
||||
"""
|
||||
try:
|
||||
point = geom.representative_point()
|
||||
except GEOSException:
|
||||
return None
|
||||
half = _OUTPUT_PRECISION_DEG * _FOOTPRINT_GRID_CELLS
|
||||
square = box(point.x - half, point.y - half, point.x + half, point.y + half)
|
||||
return _snap_polygonal(square, _OUTPUT_PRECISION_DEG)
|
||||
|
||||
|
||||
def _geojson_geometry(geom) -> dict | None:
|
||||
"""Serialize a WGS84 polygon/multipolygon to a 6dp GeoJSON dict, or None."""
|
||||
"""Serialize a WGS84 polygon/multipolygon to a *valid* 6dp GeoJSON dict, or None.
|
||||
|
||||
The coordinates are snapped onto the 1e-6° output grid with a re-noding pass
|
||||
BEFORE the 6dp rounding, not by the round alone. ``_resolve_overlaps`` leaves
|
||||
thin overlap-sliver triangles with full-precision (off-grid) vertices at OA
|
||||
seams; a bare round-to-6dp collapses those into degenerate rings (GEOS "Too
|
||||
few points") and pinches rings into self-intersections that pass the
|
||||
pre-rounding validity check but fail once the feature is read back from disk.
|
||||
Snapping with ``valid_output`` nodes the geometry onto the grid so the round
|
||||
that follows lands on exact 1e-6 multiples and cannot reintroduce invalidity.
|
||||
|
||||
``None`` only for a genuinely empty/degenerate-with-no-location input; a
|
||||
non-empty geometry that snaps to a sub-grid sliver is rescued into a minimal
|
||||
grid footprint rather than dropped (an active postcode must keep a boundary).
|
||||
"""
|
||||
geom = _polygonal(geom if geom.is_valid else make_valid(geom))
|
||||
if geom is None or geom.is_empty:
|
||||
return None
|
||||
gj = mapping(geom)
|
||||
return {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
||||
snapped = _snap_polygonal(geom, _OUTPUT_PRECISION_DEG)
|
||||
if snapped is None or snapped.is_empty:
|
||||
snapped = _grid_footprint(geom)
|
||||
if snapped is None or snapped.is_empty:
|
||||
return None
|
||||
gj = mapping(snapped)
|
||||
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
||||
# Defence-in-depth: re-validate the dict that actually reaches disk. Snapping
|
||||
# makes the round exact, so this should already hold; repair once more on the
|
||||
# grid if a pathological vertex still pinches a ring.
|
||||
if not shape(out).is_valid:
|
||||
snapped = _snap_polygonal(shape(out), _OUTPUT_PRECISION_DEG)
|
||||
if snapped is None or snapped.is_empty:
|
||||
return None
|
||||
gj = mapping(snapped)
|
||||
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
||||
return out
|
||||
|
||||
|
||||
def write_district_geojson(
|
||||
|
|
|
|||
|
|
@ -3,13 +3,23 @@ from collections import Counter, defaultdict
|
|||
import numpy as np
|
||||
from scipy.spatial import cKDTree
|
||||
from shapely import STRtree, make_valid
|
||||
from shapely.geometry import MultiPolygon, Polygon
|
||||
from shapely.ops import unary_union
|
||||
from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon
|
||||
|
||||
from .geometry import safe_difference, safe_intersection, safe_union
|
||||
from .voronoi import compute_voronoi_regions
|
||||
|
||||
MIN_GEOM_AREA = 0.01
|
||||
|
||||
# Minimal footprint (BNG metres) for a postcode whose UPRN seed wins no area in a
|
||||
# crowded multi-postcode OA — its Voronoi cell ∩ remaining collapses below
|
||||
# MIN_GEOM_AREA, or its seed sits inside an INSPIRE parcel wholly claimed by a
|
||||
# co-located postcode. Every *active* postcode must keep a boundary
|
||||
# (validate_outputs is zero-tolerance), so it gets a small disc at its true seed
|
||||
# location. ~28 m² clears MIN_GEOM_AREA and output snapping; the overlap it
|
||||
# creates with the area's winner is resolved at the output stage, where the
|
||||
# smaller postcode wins the contested ground (see output._resolve_overlaps).
|
||||
_MIN_SEED_FOOTPRINT_M = 3.0
|
||||
|
||||
|
||||
def process_oa(
|
||||
oa_geom: Polygon | MultiPolygon,
|
||||
|
|
@ -36,10 +46,12 @@ def process_oa(
|
|||
|
||||
# Compute remaining area
|
||||
if claimed:
|
||||
all_claimed = unary_union(list(claimed.values()))
|
||||
all_claimed = safe_union(list(claimed.values()))
|
||||
all_claimed = _clean_polygonal(all_claimed)
|
||||
remaining = (
|
||||
valid_oa.difference(all_claimed) if all_claimed is not None else valid_oa
|
||||
safe_difference(valid_oa, all_claimed)
|
||||
if all_claimed is not None
|
||||
else valid_oa
|
||||
)
|
||||
remaining = _clean_polygonal(remaining)
|
||||
else:
|
||||
|
|
@ -60,13 +72,54 @@ def process_oa(
|
|||
|
||||
fragments = []
|
||||
for pc, parts in result.items():
|
||||
merged = _clean_polygonal(unary_union(parts))
|
||||
merged = _clean_polygonal(safe_union(parts))
|
||||
if merged is not None:
|
||||
fragments.append((pc, merged))
|
||||
|
||||
# Every postcode with a UPRN seed in this OA must keep at least a minimal
|
||||
# footprint — in a dense OA (a block of flats with hundreds of distinct
|
||||
# postcodes) a single-seed postcode's cell can collapse below MIN_GEOM_AREA or
|
||||
# be fully absorbed by a co-located postcode's INSPIRE parcel, producing no
|
||||
# fragment, and an active postcode must never be dropped.
|
||||
orphans = unique_pcs - {pc for pc, _ in fragments}
|
||||
if orphans:
|
||||
fragments.extend(_seed_footprints(orphans, points, postcodes, valid_oa))
|
||||
|
||||
return fragments
|
||||
|
||||
|
||||
def _seed_footprints(
|
||||
orphans: set[str],
|
||||
points: np.ndarray,
|
||||
postcodes: list[str],
|
||||
valid_oa: Polygon | MultiPolygon,
|
||||
) -> list[tuple[str, Polygon | MultiPolygon]]:
|
||||
"""Give each orphan postcode a minimal disc footprint at its UPRN seed(s).
|
||||
|
||||
Orphans are postcodes with a UPRN in this OA that nonetheless won no area in
|
||||
the INSPIRE/Voronoi partition. Each keeps a small disc at its true location,
|
||||
clipped to the OA; the overlap with the area's winner is resolved at output.
|
||||
"""
|
||||
by_pc: dict[str, list] = defaultdict(list)
|
||||
for i, pc in enumerate(postcodes):
|
||||
if pc in orphans:
|
||||
by_pc[pc].append(points[i])
|
||||
|
||||
out: list[tuple[str, Polygon | MultiPolygon]] = []
|
||||
for pc, pts in by_pc.items():
|
||||
arr = np.asarray(pts, dtype=np.float64)
|
||||
seed = Point(arr[0]) if len(arr) == 1 else MultiPoint(arr)
|
||||
disc = seed.buffer(_MIN_SEED_FOOTPRINT_M)
|
||||
clipped = _clean_polygonal(safe_intersection(disc, valid_oa))
|
||||
if clipped is None:
|
||||
# Seed on/near the OA edge: keep the unclipped disc so the postcode
|
||||
# still gets a footprint at its location rather than no boundary.
|
||||
clipped = _clean_polygonal(disc)
|
||||
if clipped is not None:
|
||||
out.append((pc, clipped))
|
||||
return out
|
||||
|
||||
|
||||
def _claim_inspire_parcels(
|
||||
valid_oa: Polygon | MultiPolygon,
|
||||
points: np.ndarray,
|
||||
|
|
@ -141,7 +194,7 @@ def _claim_inspire_parcels(
|
|||
|
||||
assignable = parcel
|
||||
if contained_union is not None:
|
||||
assignable = assignable.difference(contained_union)
|
||||
assignable = safe_difference(assignable, contained_union)
|
||||
for part in _polygon_parts(assignable):
|
||||
part = _clean_polygonal(part)
|
||||
if part is None:
|
||||
|
|
@ -169,7 +222,7 @@ def _prepare_inspire_parcels(
|
|||
continue
|
||||
if not geom.intersects(valid_oa):
|
||||
continue
|
||||
clipped = _clean_polygonal(geom.intersection(valid_oa))
|
||||
clipped = _clean_polygonal(safe_intersection(geom, valid_oa))
|
||||
if clipped is not None:
|
||||
parcels.append(clipped)
|
||||
return parcels
|
||||
|
|
@ -199,7 +252,7 @@ def _merge_parts_by_postcode(
|
|||
) -> dict[str, Polygon | MultiPolygon]:
|
||||
merged: dict[str, Polygon | MultiPolygon] = {}
|
||||
for pc, parts in parts_by_postcode.items():
|
||||
geom = _clean_polygonal(unary_union(parts))
|
||||
geom = _clean_polygonal(safe_union(parts))
|
||||
if geom is not None:
|
||||
merged[pc] = geom
|
||||
return merged
|
||||
|
|
@ -210,7 +263,7 @@ def _union_claims(
|
|||
) -> Polygon | MultiPolygon | None:
|
||||
if not claims:
|
||||
return None
|
||||
return _clean_polygonal(unary_union([geom for _, geom in claims]))
|
||||
return _clean_polygonal(safe_union([geom for _, geom in claims]))
|
||||
|
||||
|
||||
def _resolve_ordered_claims(
|
||||
|
|
@ -224,11 +277,11 @@ def _resolve_ordered_claims(
|
|||
if geom is None:
|
||||
continue
|
||||
if used is not None:
|
||||
geom = _clean_polygonal(geom.difference(used))
|
||||
geom = _clean_polygonal(safe_difference(geom, used))
|
||||
if geom is None:
|
||||
continue
|
||||
resolved_parts[pc].append(geom)
|
||||
used = _clean_polygonal(geom if used is None else unary_union([used, geom]))
|
||||
used = _clean_polygonal(geom if used is None else safe_union([used, geom]))
|
||||
return _merge_parts_by_postcode(resolved_parts)
|
||||
|
||||
|
||||
|
|
@ -261,7 +314,7 @@ def _extract_polygonal(geom) -> Polygon | MultiPolygon | None:
|
|||
# overlapping polygonal parts, and a MultiPolygon of overlapping parts is
|
||||
# invalid — it double-counts area and makes the next `.difference()` raise
|
||||
# a TopologyException that aborts the OA (and, in parallel mode, the
|
||||
# worker). unary_union merges them into a valid geometry.
|
||||
merged = unary_union(polys)
|
||||
# worker). safe_union merges them into a valid geometry.
|
||||
merged = safe_union(polys)
|
||||
return merged if not merged.is_empty else None
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -757,6 +757,50 @@ class TestProcessOAInspireParcelAssignment:
|
|||
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
|
||||
|
||||
|
||||
class TestProcessOASeedFootprintGuarantee:
|
||||
"""Every postcode with a UPRN seed in a multi-postcode OA must produce a
|
||||
fragment, even when its partition cell collapses below MIN_GEOM_AREA — an
|
||||
active postcode must never be dropped (validate_outputs is zero-tolerance)."""
|
||||
|
||||
def test_collapsed_voronoi_cells_rescued_as_footprints(self):
|
||||
# OA just above MIN_GEOM_AREA; a 2-way Voronoi split leaves each cell
|
||||
# (~0.0098 m²) below MIN_GEOM_AREA, so both would be dropped without rescue.
|
||||
oa_geom = box(0, 0, 0.14, 0.14) # 0.0196 m²
|
||||
points = np.array([[0.035, 0.07], [0.105, 0.07]])
|
||||
postcodes = ["AA1 1AA", "AA1 1AB"]
|
||||
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
|
||||
got = {pc for pc, _ in fragments}
|
||||
assert got == {"AA1 1AA", "AA1 1AB"}, f"dropped: {set(postcodes) - got}"
|
||||
for _, geom in fragments:
|
||||
assert geom.is_valid
|
||||
assert geom.area > MIN_GEOM_AREA
|
||||
|
||||
def test_seed_footprint_sits_on_the_uprn(self):
|
||||
from shapely.geometry import Point
|
||||
|
||||
from .process_oa import _seed_footprints
|
||||
|
||||
oa_geom = box(0, 0, 1000, 1000)
|
||||
points = np.array([[500.0, 500.0]])
|
||||
out = _seed_footprints({"AA1 1AA"}, points, ["AA1 1AA"], oa_geom)
|
||||
|
||||
assert len(out) == 1
|
||||
pc, geom = out[0]
|
||||
assert pc == "AA1 1AA"
|
||||
assert geom.is_valid and geom.area > MIN_GEOM_AREA
|
||||
assert geom.contains(Point(500, 500))
|
||||
|
||||
def test_only_orphans_get_footprints(self):
|
||||
# A wins real area; B's lone seed collapses. Only B should be rescued, and
|
||||
# A's genuine (large) fragment must be untouched by the rescue.
|
||||
oa_geom = box(0, 0, 0.14, 0.14)
|
||||
points = np.array([[0.07, 0.07], [0.07, 0.07], [0.105, 0.07]])
|
||||
postcodes = ["AA1 1AA", "AA1 1AA", "AA1 1AB"]
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
|
||||
assert {pc for pc, _ in fragments} == {"AA1 1AA", "AA1 1AB"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_polygonal helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -1221,6 +1265,238 @@ class TestOutputPartition:
|
|||
assert geoms["AA1 1AB"].area > 0
|
||||
|
||||
|
||||
class TestGeojsonGeometrySliverValidity:
|
||||
"""_geojson_geometry must emit geometry that is still valid after the final 6dp
|
||||
rounding. A sub-grid overlap sliver (left by _resolve_overlaps' full-precision
|
||||
difference) must be snapped away on the output grid rather than collapsed by a
|
||||
naive round into a degenerate ('Too few points') / self-intersecting ring that
|
||||
only fails once read back from disk."""
|
||||
|
||||
def test_subgrid_sliver_does_not_produce_invalid_ring(self):
|
||||
from shapely.geometry import shape
|
||||
from shapely.validation import explain_validity
|
||||
|
||||
from .output import _geojson_geometry
|
||||
|
||||
main = box(-0.34, 51.74, -0.33, 51.75) # ~degree-scale, valid
|
||||
# A thin triangle whose three vertices all round to the same 1e-6 cell.
|
||||
sliver = Polygon(
|
||||
[
|
||||
(-0.3400284, 51.7505061),
|
||||
(-0.3400280, 51.7505060),
|
||||
(-0.3400281, 51.7505063),
|
||||
]
|
||||
)
|
||||
assert sliver.is_valid
|
||||
|
||||
gj = _geojson_geometry(MultiPolygon([main, sliver]))
|
||||
assert gj is not None
|
||||
rt = shape(gj)
|
||||
assert rt.is_valid, explain_validity(rt)
|
||||
assert not rt.is_empty
|
||||
# The real (main) area is preserved; only the degenerate sliver is dropped.
|
||||
assert rt.area == pytest.approx(main.area, rel=1e-3)
|
||||
|
||||
def test_pure_sliver_is_rescued_not_written_invalid(self):
|
||||
from shapely.geometry import shape
|
||||
|
||||
from .output import _OUTPUT_PRECISION_DEG, _geojson_geometry
|
||||
|
||||
# A sub-grid sliver (extent < 1e-6° in one dimension) that snaps to empty:
|
||||
# it must be rescued into a minimal valid footprint at its location, never
|
||||
# written invalid and never dropped (an active postcode keeps a boundary).
|
||||
g = _OUTPUT_PRECISION_DEG
|
||||
sliver = Polygon(
|
||||
[
|
||||
(-0.30, 51.75),
|
||||
(-0.30 + 5 * g, 51.75),
|
||||
(-0.30 + 5 * g, 51.75 + 0.2 * g),
|
||||
(-0.30, 51.75 + 0.1 * g),
|
||||
]
|
||||
)
|
||||
gj = _geojson_geometry(sliver)
|
||||
assert gj is not None # rescued, not dropped
|
||||
rt = shape(gj)
|
||||
assert rt.is_valid and not rt.is_empty
|
||||
assert rt.distance(sliver) == pytest.approx(0.0, abs=1e-4)
|
||||
|
||||
|
||||
class TestColocatedPostcodesAllRetained:
|
||||
"""Co-located non-geographic postcodes (e.g. AL1 9xx) have heavily-overlapping
|
||||
tiny footprints; the de-overlap pass trims most to sub-grid slivers. None may
|
||||
be dropped — every active postcode must keep a (valid, non-empty) boundary."""
|
||||
|
||||
def test_overlapping_tiny_footprints_none_dropped(self, tmp_path):
|
||||
from shapely.geometry import Point, shape
|
||||
|
||||
# 12 ~3 m discs within ~1 m of each other → after de-overlap most collapse
|
||||
# below output precision.
|
||||
postcodes = {}
|
||||
for i in range(12):
|
||||
e = 514000.0 + (i % 4) * 0.3
|
||||
n = 206000.0 + (i // 4) * 0.3
|
||||
postcodes[f"AL1 9{chr(65 + i)}{chr(65 + i)}"] = Point(e, n).buffer(3.0)
|
||||
|
||||
write_district_geojson(postcodes, tmp_path)
|
||||
coll = json.loads((tmp_path / "units" / "AL1.geojson").read_text())
|
||||
written = {f["properties"]["postcodes"] for f in coll["features"]}
|
||||
assert written == set(postcodes), f"dropped: {set(postcodes) - written}"
|
||||
for feature in coll["features"]:
|
||||
geom = shape(feature["geometry"])
|
||||
assert geom.is_valid and not geom.is_empty
|
||||
|
||||
|
||||
class TestSafeOverlayHelpers:
|
||||
"""The robust overlay helpers retry on a fixed-precision grid after a
|
||||
GEOSException (e.g. ``side location conflict`` from near-coincident OA-seam
|
||||
edges). The grid is a caller-supplied parameter: metres for the BNG stages,
|
||||
1e-6 degrees for the WGS84 output stage — so the same helper serves both
|
||||
without crushing degree-scale shapes on the metre default."""
|
||||
|
||||
def test_grid_param_honored_on_clean_inputs(self):
|
||||
from . import geometry
|
||||
|
||||
a = box(0, 0, 10, 10)
|
||||
b = box(5, 0, 15, 10)
|
||||
# No exception → exact result, identical regardless of the fallback grid.
|
||||
assert geometry.safe_difference(a, b, grid=1e-6).equals(a.difference(b))
|
||||
assert geometry.safe_union([a, b], grid=1e-6).equals(unary_union([a, b]))
|
||||
assert geometry.safe_intersection(a, b, grid=1e-6).equals(a.intersection(b))
|
||||
|
||||
def test_difference_falls_back_to_fixed_precision_overlay(self, monkeypatch):
|
||||
from shapely import GEOSException
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import geometry
|
||||
|
||||
a = box(0, 0, 10, 10)
|
||||
b = box(5, 0, 15, 10)
|
||||
|
||||
def _boom(self, other, *args, **kwargs):
|
||||
raise GEOSException("forced side location conflict")
|
||||
|
||||
# Patch the .difference() *method*; the fallback uses the top-level
|
||||
# shapely.difference() function, so it still completes.
|
||||
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||
result = geometry.safe_difference(a, b, grid=1e-6)
|
||||
assert result.is_valid
|
||||
assert result.area == pytest.approx(50.0) # left half of `a`
|
||||
|
||||
def test_union_falls_back_to_fixed_precision_overlay(self, monkeypatch):
|
||||
from shapely import GEOSException
|
||||
|
||||
from . import geometry
|
||||
|
||||
a = box(0, 0, 10, 10)
|
||||
b = box(5, 0, 15, 10)
|
||||
|
||||
def _boom(_geoms):
|
||||
raise GEOSException("forced robustness failure")
|
||||
|
||||
monkeypatch.setattr(geometry, "unary_union", _boom)
|
||||
result = geometry.safe_union([a, b], grid=1e-6)
|
||||
assert result.is_valid
|
||||
assert result.area == pytest.approx(150.0) # 100 + 100 - 50 overlap
|
||||
|
||||
# A self-intersecting bow-tie: invalid. set_precision()'s DEFAULT
|
||||
# ('valid_output') mode runs its own noding pass that re-raises
|
||||
# 'side location conflict' on this — which is exactly how the production
|
||||
# crash happened (the fallback re-raised the error it was meant to absorb).
|
||||
_BOWTIE = Polygon([(0, 0), (1e-5, 1e-5), (1e-5, 0), (0, 1e-5), (0, 0)])
|
||||
# make_valid() of this spiky ring returns a mixed-dimension
|
||||
# GeometryCollection (polygon + dangling line); OverlayNG rejects that with
|
||||
# 'Overlay input is mixed-dimension', so the fallback must strip the debris.
|
||||
_SPIKY = Polygon(
|
||||
[(0, 0), (10, 0), (10, 10), (5, 5), (5.0000001, 5), (0, 10), (0, 0), (15, -5), (0, 0)]
|
||||
)
|
||||
|
||||
def test_difference_fallback_survives_invalid_input(self, monkeypatch):
|
||||
"""Regression for the production crash: the fallback must not reduce
|
||||
precision with set_precision's default valid_output mode, which re-raises
|
||||
'side location conflict' on an invalid geometry."""
|
||||
from shapely import GEOSException
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import geometry
|
||||
|
||||
assert not self._BOWTIE.is_valid # precondition
|
||||
|
||||
def _boom(self, other, *a, **k):
|
||||
raise GEOSException("forced side location conflict")
|
||||
|
||||
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||
result = geometry.safe_difference(self._BOWTIE, box(0, 0, 5e-6, 5e-6), grid=1e-6)
|
||||
assert result.is_valid
|
||||
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||
|
||||
def test_difference_fallback_survives_make_valid_geometrycollection(
|
||||
self, monkeypatch
|
||||
):
|
||||
"""Regression: make_valid can yield a mixed-dimension GeometryCollection
|
||||
that OverlayNG rejects; the fallback must keep only polygonal parts."""
|
||||
from shapely import GEOSException, make_valid
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import geometry
|
||||
|
||||
assert make_valid(self._SPIKY).geom_type == "GeometryCollection" # precondition
|
||||
|
||||
def _boom(self, other, *a, **k):
|
||||
raise GEOSException("forced side location conflict")
|
||||
|
||||
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||
result = geometry.safe_difference(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
|
||||
assert result.is_valid
|
||||
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||
assert result.area > 0
|
||||
|
||||
def test_intersection_fallback_survives_make_valid_geometrycollection(
|
||||
self, monkeypatch
|
||||
):
|
||||
from shapely import GEOSException
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import geometry
|
||||
|
||||
def _boom(self, other, *a, **k):
|
||||
raise GEOSException("forced side location conflict")
|
||||
|
||||
monkeypatch.setattr(BaseGeometry, "intersection", _boom)
|
||||
result = geometry.safe_intersection(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
|
||||
assert result.is_valid
|
||||
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||
assert result.area > 0
|
||||
|
||||
def test_union_fallback_survives_invalid_and_collection_inputs(self, monkeypatch):
|
||||
"""The union fallback must absorb both an invalid bow-tie and a
|
||||
make_valid-becomes-GeometryCollection input without raising."""
|
||||
from shapely import GEOSException
|
||||
|
||||
from . import geometry
|
||||
|
||||
def _boom(_geoms):
|
||||
raise GEOSException("forced robustness failure")
|
||||
|
||||
monkeypatch.setattr(geometry, "unary_union", _boom)
|
||||
result = geometry.safe_union(
|
||||
[self._BOWTIE, self._SPIKY, box(20, 20, 30, 30)], grid=1e-4
|
||||
)
|
||||
assert result.is_valid
|
||||
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||
|
||||
def test_helpers_never_raise_on_empty_inputs(self):
|
||||
"""Degenerate/empty inputs must not abort a multi-hour run."""
|
||||
from . import geometry
|
||||
|
||||
empty = Polygon()
|
||||
a = box(0, 0, 10, 10)
|
||||
assert geometry.safe_difference(empty, a).is_empty
|
||||
assert geometry.safe_difference(a, empty).equals(a)
|
||||
assert geometry.safe_intersection(empty, a).is_empty
|
||||
assert geometry.safe_union([]).is_empty
|
||||
assert geometry.safe_union([empty]).is_empty
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# InspireIndex must return the same candidates as a brute-force bbox scan
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ import numpy as np
|
|||
from scipy.spatial import QhullError, Voronoi
|
||||
from shapely import make_valid
|
||||
from shapely.geometry import MultiPolygon, Polygon
|
||||
from shapely.ops import unary_union
|
||||
|
||||
from .geometry import safe_intersection, safe_union
|
||||
|
||||
|
||||
def compute_voronoi_regions(
|
||||
|
|
@ -111,13 +112,13 @@ def compute_voronoi_regions(
|
|||
poly = Polygon(vertices)
|
||||
if not poly.is_valid:
|
||||
poly = make_valid(poly)
|
||||
clipped = poly.intersection(boundary)
|
||||
clipped = safe_intersection(poly, boundary)
|
||||
if not clipped.is_empty:
|
||||
pc_polys[unique_pcs[i]].append(clipped)
|
||||
|
||||
result = {}
|
||||
for pc, parts in pc_polys.items():
|
||||
merged = unary_union(parts)
|
||||
merged = safe_union(parts)
|
||||
if not merged.is_empty:
|
||||
result[pc] = merged
|
||||
return result
|
||||
|
|
@ -143,7 +144,7 @@ def _equal_split_fallback(
|
|||
(min_x, strip_max_y),
|
||||
]
|
||||
)
|
||||
clipped = boundary.intersection(strip)
|
||||
clipped = safe_intersection(boundary, strip)
|
||||
if not clipped.is_empty:
|
||||
result[pc] = clipped
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -66,6 +66,53 @@ def test_accumulate_clipped_area_drops_missing_and_empty_geometry() -> None:
|
|||
assert canopy_area[0] == pytest.approx(400.0)
|
||||
|
||||
|
||||
def test_accumulate_clipped_area_survives_invalid_polygon() -> None:
|
||||
"""A self-intersecting external polygon (TOW/NFI data is occasionally invalid)
|
||||
must not abort the batched overlay with 'TopologyException: side location
|
||||
conflict'; its repaired in-buffer area is still accumulated."""
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||
circles, tree = _postcode_buffers(points, radius_m)
|
||||
|
||||
# Bow-tie centred on A: self-intersecting => invalid. The raw batched
|
||||
# shapely.intersection raises 'side location conflict' on it; make_valid splits
|
||||
# it into two triangles of total area 200, fully inside A's radius-50 buffer.
|
||||
bowtie = shapely.Polygon([(-10, -10), (10, 10), (10, -10), (-10, 10), (-10, -10)])
|
||||
assert not shapely.is_valid(bowtie) # precondition
|
||||
with pytest.raises(shapely.errors.GEOSException): # documents the raw hazard
|
||||
shapely.intersection(
|
||||
np.array([bowtie], dtype=object), np.array([circles[0]], dtype=object)
|
||||
)
|
||||
|
||||
canopy_area = np.zeros(1)
|
||||
_accumulate_clipped_area(np.array([bowtie], dtype=object), circles, tree, canopy_area)
|
||||
assert canopy_area[0] == pytest.approx(200.0, rel=1e-3)
|
||||
|
||||
|
||||
def test_robust_intersection_area_recovers_from_overlay_failure(monkeypatch) -> None:
|
||||
"""The batched-overlay fallback must absorb a GEOSException from the fast path
|
||||
and recover (validate + retry), returning the correct per-pair areas. Version
|
||||
independent: the fast-path failure is forced rather than data-dependent."""
|
||||
from pipeline.transform import tree_density
|
||||
|
||||
real_intersection = shapely.intersection
|
||||
calls = {"n": 0}
|
||||
|
||||
def flaky(a, b, **kwargs):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1: # fail only the first (fast-path) call
|
||||
raise shapely.errors.GEOSException("forced side location conflict")
|
||||
return real_intersection(a, b, **kwargs)
|
||||
|
||||
monkeypatch.setattr(tree_density.shapely, "intersection", flaky)
|
||||
|
||||
a = np.array([shapely.box(0, 0, 10, 10), shapely.box(0, 0, 4, 4)], dtype=object)
|
||||
b = np.array([shapely.box(0, 0, 6, 6), shapely.box(0, 0, 2, 2)], dtype=object)
|
||||
out = tree_density._robust_intersection_area(a, b)
|
||||
assert calls["n"] >= 2 # fast path failed -> fallback path executed
|
||||
assert out.tolist() == pytest.approx([36.0, 4.0])
|
||||
|
||||
|
||||
def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||
|
|
|
|||
|
|
@ -263,6 +263,43 @@ def _postcode_buffers(
|
|||
return circles, shapely.STRtree(circles)
|
||||
|
||||
|
||||
# 0.1 mm in the BNG working CRS (EPSG:27700) — far below survey resolution; the
|
||||
# same grid the postcode_boundaries overlay uses.
|
||||
_OVERLAY_GRID_M = 1e-4
|
||||
|
||||
|
||||
def _robust_intersection_area(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||||
"""Vectorized ``area(a[i] ∩ b[i])`` that survives GEOS robustness failures.
|
||||
|
||||
External Forest Research TOW/NFI polygons are occasionally invalid
|
||||
(self-intersections), and a single bad polygon makes the batched
|
||||
``shapely.intersection`` raise ``TopologyException: side location conflict``,
|
||||
aborting the whole run. The fast path is the raw batched overlay — unchanged,
|
||||
full-speed, when the data is clean — and only a failure triggers repair.
|
||||
|
||||
The repair deliberately uses a *plain* overlay rather than the fixed-precision
|
||||
(``grid_size``) one: ``make_valid`` can emit a mixed-dimension
|
||||
``GeometryCollection`` (a polygon plus a dangling line), which OverlayNG
|
||||
rejects with ``Overlay input is mixed-dimension`` — whereas a plain overlay
|
||||
accepts it, and its non-polygonal debris has zero area and is dropped by the
|
||||
``clipped_area > 0`` filter downstream anyway. A final pointwise coordinate
|
||||
snap (which never raises) collapses the near-coincident edges behind any
|
||||
residual full-precision robustness failure.
|
||||
"""
|
||||
try:
|
||||
return shapely.area(shapely.intersection(a, b))
|
||||
except shapely.errors.GEOSException:
|
||||
pass
|
||||
a = shapely.make_valid(a)
|
||||
b = shapely.make_valid(b)
|
||||
try:
|
||||
return shapely.area(shapely.intersection(a, b))
|
||||
except shapely.errors.GEOSException:
|
||||
a = shapely.make_valid(shapely.set_precision(a, _OVERLAY_GRID_M, mode="pointwise"))
|
||||
b = shapely.make_valid(shapely.set_precision(b, _OVERLAY_GRID_M, mode="pointwise"))
|
||||
return shapely.area(shapely.intersection(a, b))
|
||||
|
||||
|
||||
def _accumulate_clipped_area(
|
||||
geoms: np.ndarray,
|
||||
circles: np.ndarray,
|
||||
|
|
@ -294,8 +331,8 @@ def _accumulate_clipped_area(
|
|||
if geom_index.size == 0:
|
||||
return
|
||||
|
||||
clipped_area = shapely.area(
|
||||
shapely.intersection(geoms[geom_index], circles[postcode_index])
|
||||
clipped_area = _robust_intersection_area(
|
||||
geoms[geom_index], circles[postcode_index]
|
||||
)
|
||||
positive = clipped_area > 0
|
||||
geom_index = geom_index[positive]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue