don't crash

This commit is contained in:
Andras Schmelczer 2026-06-04 20:40:42 +01:00
parent aab85fe32e
commit d6d20ccd37
13 changed files with 2630 additions and 3924 deletions

View file

@ -0,0 +1,123 @@
"""Robust GEOS overlay helpers.
Overlay operations (union, difference, intersection) can raise a
``GEOSException`` most often ``TopologyException: side location conflict``,
``Ring edge missing``, or ``found non-noded intersection`` on geometries that
contain near-coincident or near-degenerate edges, or that are individually
invalid. The robust remedy is a *fixed-precision* overlay: GEOS's OverlayNG
engine, handed a grid size, nodes every edge onto that grid and finishes where
the full-precision overlay cannot.
Getting the fallback to actually survive takes three rules, each of which we
learned the hard way from a crash:
1. **Never precision-reduce with the default mode.** ``set_precision``'s default
``valid_output`` (and ``keep_collapsed``) mode runs its *own* noding pass that
re-raises the very ``side location conflict`` we are trying to escape. We push
the grid into the overlay via the ``grid_size`` argument instead where
OverlayNG nodes robustly and only ever call ``set_precision`` in
``pointwise`` mode (pure coordinate rounding, which cannot raise).
2. **Validate first.** ``make_valid`` repairs the self-intersections (bow-ties,
pinches) that make GEOS choke, so the overlay starts from an OGC-valid shape.
3. **Keep only polygonal parts.** ``make_valid`` of a spiky polygon routinely
returns a *mixed-dimension* ``GeometryCollection`` (a polygon plus a dangling
line), and OverlayNG rejects mixed-dimension input with
``IllegalArgumentException: Overlay input is mixed-dimension``. Every input
here represents an area, so the line/point debris is meaningless noise and is
dropped before the overlay.
The default fallback grid is 0.1 mm in the metre-based working CRS
(EPSG:27700), far below survey resolution and the ``MIN_GEOM_AREA`` threshold
downstream, so it cannot crush a postcode-scale shape. Callers operating in a
different CRS (e.g. the WGS84 output stage, where the same 1e-4 grid would be
~11 m) pass a ``grid`` matched to that CRS. The whole fallback runs ONLY after
the exact operation has already failed, so normal output is bit-for-bit
unchanged.
"""
import shapely
from shapely import GEOSException, make_valid, set_precision
from shapely.geometry import Polygon
from shapely.ops import unary_union
# 0.1 mm in metres — well below MIN_GEOM_AREA (0.01 m^2) and survey resolution.
_SNAP_GRID = 1e-4
_EMPTY = Polygon()
def _poly_valid(geom, grid):
"""Return an OGC-valid, polygonal-only version of ``geom``.
Repairs invalidity with ``make_valid`` and discards any non-polygonal debris
(dangling lines/points) it leaves behind, since OverlayNG rejects
mixed-dimension ``GeometryCollection`` input. The result is always a
``Polygon``/``MultiPolygon`` (possibly empty), safe to feed to a
fixed-precision overlay.
"""
g = geom if geom.is_valid else make_valid(geom)
if g.geom_type in ("Polygon", "MultiPolygon"):
return g
if g.geom_type == "GeometryCollection":
polys = [
p
for p in g.geoms
if p.geom_type in ("Polygon", "MultiPolygon") and not p.is_empty
]
if not polys:
return _EMPTY
if len(polys) == 1:
return polys[0]
# Dissolve on the grid (robust) rather than building a possibly-invalid
# MultiPolygon of overlapping parts.
return shapely.union_all(polys, grid_size=grid)
return _EMPTY # line / point only
def _snap_poly_valid(geom, grid):
"""Coordinate-round onto ``grid`` (``pointwise`` never raises), then clean."""
return _poly_valid(set_precision(geom, grid, mode="pointwise"), grid)
def safe_union(geoms, grid=_SNAP_GRID):
"""``unary_union`` that survives GEOS robustness failures."""
try:
return unary_union(geoms)
except GEOSException:
pass
cleaned = [_poly_valid(g, grid) for g in geoms]
try:
return shapely.union_all(cleaned, grid_size=grid)
except GEOSException:
snapped = [_snap_poly_valid(g, grid) for g in geoms]
return shapely.union_all(snapped, grid_size=grid)
def safe_difference(a, b, grid=_SNAP_GRID):
"""``a.difference(b)`` that survives GEOS robustness failures."""
try:
return a.difference(b)
except GEOSException:
pass
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
try:
return shapely.difference(a2, b2, grid_size=grid)
except GEOSException:
return shapely.difference(
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
)
def safe_intersection(a, b, grid=_SNAP_GRID):
"""``a.intersection(b)`` that survives GEOS robustness failures."""
try:
return a.intersection(b)
except GEOSException:
pass
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
try:
return shapely.intersection(a2, b2, grid_size=grid)
except GEOSException:
return shapely.intersection(
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
)

View file

@ -5,9 +5,10 @@ from pathlib import Path
import polars as pl
from shapely import wkb
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from .geometry import safe_difference, safe_union
def load_greenspace(path: Path) -> tuple[STRtree, list]:
"""Load greenspace parquet and build an STRtree spatial index.
@ -51,8 +52,8 @@ def subtract_greenspace(
if not intersecting:
return postcode_geom
green_union = unary_union(intersecting)
result = postcode_geom.difference(green_union)
green_union = safe_union(intersecting)
result = safe_difference(postcode_geom, green_union)
if result.is_empty:
return postcode_geom

View file

@ -7,11 +7,12 @@ import numpy as np
from pyproj import Transformer
from shapely import STRtree, make_valid, set_precision
from shapely.errors import GEOSException
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
from shapely.ops import transform as transform_geometry
from shapely.ops import unary_union
from tqdm import tqdm
from .geometry import safe_difference, safe_union
_to_wgs84 = None
@ -207,7 +208,7 @@ def merge_fragments(
merged = {}
for pc, parts in by_postcode.items():
combined = unary_union(parts)
combined = safe_union(parts)
if combined.is_empty:
continue
if not combined.is_valid:
@ -260,7 +261,10 @@ def _polygonal(geom):
]
if not polys:
return None
merged = unary_union(polys)
# Both callers run on WGS84-degree output geometry, so the robustness
# fallback snaps on the 1e-6° grid (~0.11 m), not geometry.py's metre
# default — a coarse metre grid would obliterate a degree-scale shape.
merged = safe_union(polys, grid=_OUTPUT_PRECISION_DEG)
return merged if not merged.is_empty else None
return None
@ -317,8 +321,13 @@ def _resolve_overlaps(
# Process losers from highest priority down, so every subtracted neighbour is
# already finalised.
for i in sorted(higher, key=lambda idx: rank[idx]):
cut = unary_union([out[j] for j in higher[i]])
trimmed = out[i].difference(cut)
# These geometries are WGS84 degrees already snapped to output precision,
# so the robustness fallback snaps on the same 1e-6° grid (~0.11 m) rather
# than geometry.py's metre-CRS default. A raw difference can still raise a
# "side location conflict" on near-coincident OA-seam edges; the
# fixed-precision overlay noded them away.
cut = safe_union([out[j] for j in higher[i]], grid=_OUTPUT_PRECISION_DEG)
trimmed = safe_difference(out[i], cut, grid=_OUTPUT_PRECISION_DEG)
if not trimmed.is_valid:
trimmed = make_valid(trimmed)
# Keep all polygonal parts: these geometries are in WGS84 degrees, so an
@ -336,13 +345,83 @@ def _round_coords(coords, ndigits=6):
return [_round_coords(c, ndigits) for c in coords]
def _snap_polygonal(geom, grid):
"""Re-node ``geom`` onto ``grid`` (``valid_output``) → polygonal-only, or None.
``set_precision`` ``valid_output`` runs an OverlayNG noding pass that places
every vertex on a multiple of ``grid`` *and* fixes the topology, so a plain
coordinate round of the result is exact (no two distinct vertices can land in
the same cell). Falls back to ``make_valid`` if precision-reduction raises.
"""
try:
snapped = set_precision(geom, grid, mode="valid_output")
except GEOSException:
snapped = make_valid(geom)
return _polygonal(snapped if snapped.is_valid else make_valid(snapped))
# A square this many output-grid cells on a side, used as the last-resort
# footprint when snapping erases a sub-grid sliver. ~10 cells (≈0.71.1 m at UK
# latitudes) is invisible at map scale yet survives the 1e-6° snap as a valid,
# 4-corner ring.
_FOOTPRINT_GRID_CELLS = 5
def _grid_footprint(geom):
"""A tiny grid-aligned square at ``geom``'s representative point, snapped valid.
Last line of defence so an *active* postcode never vanishes: the de-overlap
pass can shave a small (e.g. co-located, non-geographic) postcode down to a
sub-grid sliver that disappears when snapped to output precision. Rather than
drop it, place a minimal valid footprint at its location. The tiny overlap
this re-creates with the neighbour that trimmed it is harmless the output
partition is best-effort, a missing boundary is a hard validation failure.
"""
try:
point = geom.representative_point()
except GEOSException:
return None
half = _OUTPUT_PRECISION_DEG * _FOOTPRINT_GRID_CELLS
square = box(point.x - half, point.y - half, point.x + half, point.y + half)
return _snap_polygonal(square, _OUTPUT_PRECISION_DEG)
def _geojson_geometry(geom) -> dict | None:
"""Serialize a WGS84 polygon/multipolygon to a 6dp GeoJSON dict, or None."""
"""Serialize a WGS84 polygon/multipolygon to a *valid* 6dp GeoJSON dict, or None.
The coordinates are snapped onto the 1e-6° output grid with a re-noding pass
BEFORE the 6dp rounding, not by the round alone. ``_resolve_overlaps`` leaves
thin overlap-sliver triangles with full-precision (off-grid) vertices at OA
seams; a bare round-to-6dp collapses those into degenerate rings (GEOS "Too
few points") and pinches rings into self-intersections that pass the
pre-rounding validity check but fail once the feature is read back from disk.
Snapping with ``valid_output`` nodes the geometry onto the grid so the round
that follows lands on exact 1e-6 multiples and cannot reintroduce invalidity.
``None`` only for a genuinely empty/degenerate-with-no-location input; a
non-empty geometry that snaps to a sub-grid sliver is rescued into a minimal
grid footprint rather than dropped (an active postcode must keep a boundary).
"""
geom = _polygonal(geom if geom.is_valid else make_valid(geom))
if geom is None or geom.is_empty:
return None
gj = mapping(geom)
return {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
snapped = _snap_polygonal(geom, _OUTPUT_PRECISION_DEG)
if snapped is None or snapped.is_empty:
snapped = _grid_footprint(geom)
if snapped is None or snapped.is_empty:
return None
gj = mapping(snapped)
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
# Defence-in-depth: re-validate the dict that actually reaches disk. Snapping
# makes the round exact, so this should already hold; repair once more on the
# grid if a pathological vertex still pinches a ring.
if not shape(out).is_valid:
snapped = _snap_polygonal(shape(out), _OUTPUT_PRECISION_DEG)
if snapped is None or snapped.is_empty:
return None
gj = mapping(snapped)
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
return out
def write_district_geojson(

View file

@ -3,13 +3,23 @@ from collections import Counter, defaultdict
import numpy as np
from scipy.spatial import cKDTree
from shapely import STRtree, make_valid
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon
from .geometry import safe_difference, safe_intersection, safe_union
from .voronoi import compute_voronoi_regions
MIN_GEOM_AREA = 0.01
# Minimal footprint (BNG metres) for a postcode whose UPRN seed wins no area in a
# crowded multi-postcode OA — its Voronoi cell ∩ remaining collapses below
# MIN_GEOM_AREA, or its seed sits inside an INSPIRE parcel wholly claimed by a
# co-located postcode. Every *active* postcode must keep a boundary
# (validate_outputs is zero-tolerance), so it gets a small disc at its true seed
# location. ~28 m² clears MIN_GEOM_AREA and output snapping; the overlap it
# creates with the area's winner is resolved at the output stage, where the
# smaller postcode wins the contested ground (see output._resolve_overlaps).
_MIN_SEED_FOOTPRINT_M = 3.0
def process_oa(
oa_geom: Polygon | MultiPolygon,
@ -36,10 +46,12 @@ def process_oa(
# Compute remaining area
if claimed:
all_claimed = unary_union(list(claimed.values()))
all_claimed = safe_union(list(claimed.values()))
all_claimed = _clean_polygonal(all_claimed)
remaining = (
valid_oa.difference(all_claimed) if all_claimed is not None else valid_oa
safe_difference(valid_oa, all_claimed)
if all_claimed is not None
else valid_oa
)
remaining = _clean_polygonal(remaining)
else:
@ -60,13 +72,54 @@ def process_oa(
fragments = []
for pc, parts in result.items():
merged = _clean_polygonal(unary_union(parts))
merged = _clean_polygonal(safe_union(parts))
if merged is not None:
fragments.append((pc, merged))
# Every postcode with a UPRN seed in this OA must keep at least a minimal
# footprint — in a dense OA (a block of flats with hundreds of distinct
# postcodes) a single-seed postcode's cell can collapse below MIN_GEOM_AREA or
# be fully absorbed by a co-located postcode's INSPIRE parcel, producing no
# fragment, and an active postcode must never be dropped.
orphans = unique_pcs - {pc for pc, _ in fragments}
if orphans:
fragments.extend(_seed_footprints(orphans, points, postcodes, valid_oa))
return fragments
def _seed_footprints(
orphans: set[str],
points: np.ndarray,
postcodes: list[str],
valid_oa: Polygon | MultiPolygon,
) -> list[tuple[str, Polygon | MultiPolygon]]:
"""Give each orphan postcode a minimal disc footprint at its UPRN seed(s).
Orphans are postcodes with a UPRN in this OA that nonetheless won no area in
the INSPIRE/Voronoi partition. Each keeps a small disc at its true location,
clipped to the OA; the overlap with the area's winner is resolved at output.
"""
by_pc: dict[str, list] = defaultdict(list)
for i, pc in enumerate(postcodes):
if pc in orphans:
by_pc[pc].append(points[i])
out: list[tuple[str, Polygon | MultiPolygon]] = []
for pc, pts in by_pc.items():
arr = np.asarray(pts, dtype=np.float64)
seed = Point(arr[0]) if len(arr) == 1 else MultiPoint(arr)
disc = seed.buffer(_MIN_SEED_FOOTPRINT_M)
clipped = _clean_polygonal(safe_intersection(disc, valid_oa))
if clipped is None:
# Seed on/near the OA edge: keep the unclipped disc so the postcode
# still gets a footprint at its location rather than no boundary.
clipped = _clean_polygonal(disc)
if clipped is not None:
out.append((pc, clipped))
return out
def _claim_inspire_parcels(
valid_oa: Polygon | MultiPolygon,
points: np.ndarray,
@ -141,7 +194,7 @@ def _claim_inspire_parcels(
assignable = parcel
if contained_union is not None:
assignable = assignable.difference(contained_union)
assignable = safe_difference(assignable, contained_union)
for part in _polygon_parts(assignable):
part = _clean_polygonal(part)
if part is None:
@ -169,7 +222,7 @@ def _prepare_inspire_parcels(
continue
if not geom.intersects(valid_oa):
continue
clipped = _clean_polygonal(geom.intersection(valid_oa))
clipped = _clean_polygonal(safe_intersection(geom, valid_oa))
if clipped is not None:
parcels.append(clipped)
return parcels
@ -199,7 +252,7 @@ def _merge_parts_by_postcode(
) -> dict[str, Polygon | MultiPolygon]:
merged: dict[str, Polygon | MultiPolygon] = {}
for pc, parts in parts_by_postcode.items():
geom = _clean_polygonal(unary_union(parts))
geom = _clean_polygonal(safe_union(parts))
if geom is not None:
merged[pc] = geom
return merged
@ -210,7 +263,7 @@ def _union_claims(
) -> Polygon | MultiPolygon | None:
if not claims:
return None
return _clean_polygonal(unary_union([geom for _, geom in claims]))
return _clean_polygonal(safe_union([geom for _, geom in claims]))
def _resolve_ordered_claims(
@ -224,11 +277,11 @@ def _resolve_ordered_claims(
if geom is None:
continue
if used is not None:
geom = _clean_polygonal(geom.difference(used))
geom = _clean_polygonal(safe_difference(geom, used))
if geom is None:
continue
resolved_parts[pc].append(geom)
used = _clean_polygonal(geom if used is None else unary_union([used, geom]))
used = _clean_polygonal(geom if used is None else safe_union([used, geom]))
return _merge_parts_by_postcode(resolved_parts)
@ -261,7 +314,7 @@ def _extract_polygonal(geom) -> Polygon | MultiPolygon | None:
# overlapping polygonal parts, and a MultiPolygon of overlapping parts is
# invalid — it double-counts area and makes the next `.difference()` raise
# a TopologyException that aborts the OA (and, in parallel mode, the
# worker). unary_union merges them into a valid geometry.
merged = unary_union(polys)
# worker). safe_union merges them into a valid geometry.
merged = safe_union(polys)
return merged if not merged.is_empty else None
return None

View file

@ -757,6 +757,50 @@ class TestProcessOAInspireParcelAssignment:
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
class TestProcessOASeedFootprintGuarantee:
"""Every postcode with a UPRN seed in a multi-postcode OA must produce a
fragment, even when its partition cell collapses below MIN_GEOM_AREA an
active postcode must never be dropped (validate_outputs is zero-tolerance)."""
def test_collapsed_voronoi_cells_rescued_as_footprints(self):
# OA just above MIN_GEOM_AREA; a 2-way Voronoi split leaves each cell
# (~0.0098 m²) below MIN_GEOM_AREA, so both would be dropped without rescue.
oa_geom = box(0, 0, 0.14, 0.14) # 0.0196 m²
points = np.array([[0.035, 0.07], [0.105, 0.07]])
postcodes = ["AA1 1AA", "AA1 1AB"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
got = {pc for pc, _ in fragments}
assert got == {"AA1 1AA", "AA1 1AB"}, f"dropped: {set(postcodes) - got}"
for _, geom in fragments:
assert geom.is_valid
assert geom.area > MIN_GEOM_AREA
def test_seed_footprint_sits_on_the_uprn(self):
from shapely.geometry import Point
from .process_oa import _seed_footprints
oa_geom = box(0, 0, 1000, 1000)
points = np.array([[500.0, 500.0]])
out = _seed_footprints({"AA1 1AA"}, points, ["AA1 1AA"], oa_geom)
assert len(out) == 1
pc, geom = out[0]
assert pc == "AA1 1AA"
assert geom.is_valid and geom.area > MIN_GEOM_AREA
assert geom.contains(Point(500, 500))
def test_only_orphans_get_footprints(self):
# A wins real area; B's lone seed collapses. Only B should be rescued, and
# A's genuine (large) fragment must be untouched by the rescue.
oa_geom = box(0, 0, 0.14, 0.14)
points = np.array([[0.07, 0.07], [0.07, 0.07], [0.105, 0.07]])
postcodes = ["AA1 1AA", "AA1 1AA", "AA1 1AB"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
assert {pc for pc, _ in fragments} == {"AA1 1AA", "AA1 1AB"}
# ---------------------------------------------------------------------------
# _extract_polygonal helper
# ---------------------------------------------------------------------------
@ -1221,6 +1265,238 @@ class TestOutputPartition:
assert geoms["AA1 1AB"].area > 0
class TestGeojsonGeometrySliverValidity:
"""_geojson_geometry must emit geometry that is still valid after the final 6dp
rounding. A sub-grid overlap sliver (left by _resolve_overlaps' full-precision
difference) must be snapped away on the output grid rather than collapsed by a
naive round into a degenerate ('Too few points') / self-intersecting ring that
only fails once read back from disk."""
def test_subgrid_sliver_does_not_produce_invalid_ring(self):
from shapely.geometry import shape
from shapely.validation import explain_validity
from .output import _geojson_geometry
main = box(-0.34, 51.74, -0.33, 51.75) # ~degree-scale, valid
# A thin triangle whose three vertices all round to the same 1e-6 cell.
sliver = Polygon(
[
(-0.3400284, 51.7505061),
(-0.3400280, 51.7505060),
(-0.3400281, 51.7505063),
]
)
assert sliver.is_valid
gj = _geojson_geometry(MultiPolygon([main, sliver]))
assert gj is not None
rt = shape(gj)
assert rt.is_valid, explain_validity(rt)
assert not rt.is_empty
# The real (main) area is preserved; only the degenerate sliver is dropped.
assert rt.area == pytest.approx(main.area, rel=1e-3)
def test_pure_sliver_is_rescued_not_written_invalid(self):
from shapely.geometry import shape
from .output import _OUTPUT_PRECISION_DEG, _geojson_geometry
# A sub-grid sliver (extent < 1e-6° in one dimension) that snaps to empty:
# it must be rescued into a minimal valid footprint at its location, never
# written invalid and never dropped (an active postcode keeps a boundary).
g = _OUTPUT_PRECISION_DEG
sliver = Polygon(
[
(-0.30, 51.75),
(-0.30 + 5 * g, 51.75),
(-0.30 + 5 * g, 51.75 + 0.2 * g),
(-0.30, 51.75 + 0.1 * g),
]
)
gj = _geojson_geometry(sliver)
assert gj is not None # rescued, not dropped
rt = shape(gj)
assert rt.is_valid and not rt.is_empty
assert rt.distance(sliver) == pytest.approx(0.0, abs=1e-4)
class TestColocatedPostcodesAllRetained:
"""Co-located non-geographic postcodes (e.g. AL1 9xx) have heavily-overlapping
tiny footprints; the de-overlap pass trims most to sub-grid slivers. None may
be dropped every active postcode must keep a (valid, non-empty) boundary."""
def test_overlapping_tiny_footprints_none_dropped(self, tmp_path):
from shapely.geometry import Point, shape
# 12 ~3 m discs within ~1 m of each other → after de-overlap most collapse
# below output precision.
postcodes = {}
for i in range(12):
e = 514000.0 + (i % 4) * 0.3
n = 206000.0 + (i // 4) * 0.3
postcodes[f"AL1 9{chr(65 + i)}{chr(65 + i)}"] = Point(e, n).buffer(3.0)
write_district_geojson(postcodes, tmp_path)
coll = json.loads((tmp_path / "units" / "AL1.geojson").read_text())
written = {f["properties"]["postcodes"] for f in coll["features"]}
assert written == set(postcodes), f"dropped: {set(postcodes) - written}"
for feature in coll["features"]:
geom = shape(feature["geometry"])
assert geom.is_valid and not geom.is_empty
class TestSafeOverlayHelpers:
"""The robust overlay helpers retry on a fixed-precision grid after a
GEOSException (e.g. ``side location conflict`` from near-coincident OA-seam
edges). The grid is a caller-supplied parameter: metres for the BNG stages,
1e-6 degrees for the WGS84 output stage so the same helper serves both
without crushing degree-scale shapes on the metre default."""
def test_grid_param_honored_on_clean_inputs(self):
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
# No exception → exact result, identical regardless of the fallback grid.
assert geometry.safe_difference(a, b, grid=1e-6).equals(a.difference(b))
assert geometry.safe_union([a, b], grid=1e-6).equals(unary_union([a, b]))
assert geometry.safe_intersection(a, b, grid=1e-6).equals(a.intersection(b))
def test_difference_falls_back_to_fixed_precision_overlay(self, monkeypatch):
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
def _boom(self, other, *args, **kwargs):
raise GEOSException("forced side location conflict")
# Patch the .difference() *method*; the fallback uses the top-level
# shapely.difference() function, so it still completes.
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(a, b, grid=1e-6)
assert result.is_valid
assert result.area == pytest.approx(50.0) # left half of `a`
def test_union_falls_back_to_fixed_precision_overlay(self, monkeypatch):
from shapely import GEOSException
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
def _boom(_geoms):
raise GEOSException("forced robustness failure")
monkeypatch.setattr(geometry, "unary_union", _boom)
result = geometry.safe_union([a, b], grid=1e-6)
assert result.is_valid
assert result.area == pytest.approx(150.0) # 100 + 100 - 50 overlap
# A self-intersecting bow-tie: invalid. set_precision()'s DEFAULT
# ('valid_output') mode runs its own noding pass that re-raises
# 'side location conflict' on this — which is exactly how the production
# crash happened (the fallback re-raised the error it was meant to absorb).
_BOWTIE = Polygon([(0, 0), (1e-5, 1e-5), (1e-5, 0), (0, 1e-5), (0, 0)])
# make_valid() of this spiky ring returns a mixed-dimension
# GeometryCollection (polygon + dangling line); OverlayNG rejects that with
# 'Overlay input is mixed-dimension', so the fallback must strip the debris.
_SPIKY = Polygon(
[(0, 0), (10, 0), (10, 10), (5, 5), (5.0000001, 5), (0, 10), (0, 0), (15, -5), (0, 0)]
)
def test_difference_fallback_survives_invalid_input(self, monkeypatch):
"""Regression for the production crash: the fallback must not reduce
precision with set_precision's default valid_output mode, which re-raises
'side location conflict' on an invalid geometry."""
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
assert not self._BOWTIE.is_valid # precondition
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(self._BOWTIE, box(0, 0, 5e-6, 5e-6), grid=1e-6)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
def test_difference_fallback_survives_make_valid_geometrycollection(
self, monkeypatch
):
"""Regression: make_valid can yield a mixed-dimension GeometryCollection
that OverlayNG rejects; the fallback must keep only polygonal parts."""
from shapely import GEOSException, make_valid
from shapely.geometry.base import BaseGeometry
from . import geometry
assert make_valid(self._SPIKY).geom_type == "GeometryCollection" # precondition
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
assert result.area > 0
def test_intersection_fallback_survives_make_valid_geometrycollection(
self, monkeypatch
):
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "intersection", _boom)
result = geometry.safe_intersection(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
assert result.area > 0
def test_union_fallback_survives_invalid_and_collection_inputs(self, monkeypatch):
"""The union fallback must absorb both an invalid bow-tie and a
make_valid-becomes-GeometryCollection input without raising."""
from shapely import GEOSException
from . import geometry
def _boom(_geoms):
raise GEOSException("forced robustness failure")
monkeypatch.setattr(geometry, "unary_union", _boom)
result = geometry.safe_union(
[self._BOWTIE, self._SPIKY, box(20, 20, 30, 30)], grid=1e-4
)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
def test_helpers_never_raise_on_empty_inputs(self):
"""Degenerate/empty inputs must not abort a multi-hour run."""
from . import geometry
empty = Polygon()
a = box(0, 0, 10, 10)
assert geometry.safe_difference(empty, a).is_empty
assert geometry.safe_difference(a, empty).equals(a)
assert geometry.safe_intersection(empty, a).is_empty
assert geometry.safe_union([]).is_empty
assert geometry.safe_union([empty]).is_empty
# ---------------------------------------------------------------------------
# InspireIndex must return the same candidates as a brute-force bbox scan
# ---------------------------------------------------------------------------

View file

@ -4,7 +4,8 @@ import numpy as np
from scipy.spatial import QhullError, Voronoi
from shapely import make_valid
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
from .geometry import safe_intersection, safe_union
def compute_voronoi_regions(
@ -111,13 +112,13 @@ def compute_voronoi_regions(
poly = Polygon(vertices)
if not poly.is_valid:
poly = make_valid(poly)
clipped = poly.intersection(boundary)
clipped = safe_intersection(poly, boundary)
if not clipped.is_empty:
pc_polys[unique_pcs[i]].append(clipped)
result = {}
for pc, parts in pc_polys.items():
merged = unary_union(parts)
merged = safe_union(parts)
if not merged.is_empty:
result[pc] = merged
return result
@ -143,7 +144,7 @@ def _equal_split_fallback(
(min_x, strip_max_y),
]
)
clipped = boundary.intersection(strip)
clipped = safe_intersection(boundary, strip)
if not clipped.is_empty:
result[pc] = clipped
return result

View file

@ -66,6 +66,53 @@ def test_accumulate_clipped_area_drops_missing_and_empty_geometry() -> None:
assert canopy_area[0] == pytest.approx(400.0)
def test_accumulate_clipped_area_survives_invalid_polygon() -> None:
"""A self-intersecting external polygon (TOW/NFI data is occasionally invalid)
must not abort the batched overlay with 'TopologyException: side location
conflict'; its repaired in-buffer area is still accumulated."""
radius_m = 50
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
circles, tree = _postcode_buffers(points, radius_m)
# Bow-tie centred on A: self-intersecting => invalid. The raw batched
# shapely.intersection raises 'side location conflict' on it; make_valid splits
# it into two triangles of total area 200, fully inside A's radius-50 buffer.
bowtie = shapely.Polygon([(-10, -10), (10, 10), (10, -10), (-10, 10), (-10, -10)])
assert not shapely.is_valid(bowtie) # precondition
with pytest.raises(shapely.errors.GEOSException): # documents the raw hazard
shapely.intersection(
np.array([bowtie], dtype=object), np.array([circles[0]], dtype=object)
)
canopy_area = np.zeros(1)
_accumulate_clipped_area(np.array([bowtie], dtype=object), circles, tree, canopy_area)
assert canopy_area[0] == pytest.approx(200.0, rel=1e-3)
def test_robust_intersection_area_recovers_from_overlay_failure(monkeypatch) -> None:
"""The batched-overlay fallback must absorb a GEOSException from the fast path
and recover (validate + retry), returning the correct per-pair areas. Version
independent: the fast-path failure is forced rather than data-dependent."""
from pipeline.transform import tree_density
real_intersection = shapely.intersection
calls = {"n": 0}
def flaky(a, b, **kwargs):
calls["n"] += 1
if calls["n"] == 1: # fail only the first (fast-path) call
raise shapely.errors.GEOSException("forced side location conflict")
return real_intersection(a, b, **kwargs)
monkeypatch.setattr(tree_density.shapely, "intersection", flaky)
a = np.array([shapely.box(0, 0, 10, 10), shapely.box(0, 0, 4, 4)], dtype=object)
b = np.array([shapely.box(0, 0, 6, 6), shapely.box(0, 0, 2, 2)], dtype=object)
out = tree_density._robust_intersection_area(a, b)
assert calls["n"] >= 2 # fast path failed -> fallback path executed
assert out.tolist() == pytest.approx([36.0, 4.0])
def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
radius_m = 50
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})

View file

@ -263,6 +263,43 @@ def _postcode_buffers(
return circles, shapely.STRtree(circles)
# 0.1 mm in the BNG working CRS (EPSG:27700) — far below survey resolution; the
# same grid the postcode_boundaries overlay uses.
_OVERLAY_GRID_M = 1e-4
def _robust_intersection_area(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""Vectorized ``area(a[i] ∩ b[i])`` that survives GEOS robustness failures.
External Forest Research TOW/NFI polygons are occasionally invalid
(self-intersections), and a single bad polygon makes the batched
``shapely.intersection`` raise ``TopologyException: side location conflict``,
aborting the whole run. The fast path is the raw batched overlay unchanged,
full-speed, when the data is clean and only a failure triggers repair.
The repair deliberately uses a *plain* overlay rather than the fixed-precision
(``grid_size``) one: ``make_valid`` can emit a mixed-dimension
``GeometryCollection`` (a polygon plus a dangling line), which OverlayNG
rejects with ``Overlay input is mixed-dimension`` whereas a plain overlay
accepts it, and its non-polygonal debris has zero area and is dropped by the
``clipped_area > 0`` filter downstream anyway. A final pointwise coordinate
snap (which never raises) collapses the near-coincident edges behind any
residual full-precision robustness failure.
"""
try:
return shapely.area(shapely.intersection(a, b))
except shapely.errors.GEOSException:
pass
a = shapely.make_valid(a)
b = shapely.make_valid(b)
try:
return shapely.area(shapely.intersection(a, b))
except shapely.errors.GEOSException:
a = shapely.make_valid(shapely.set_precision(a, _OVERLAY_GRID_M, mode="pointwise"))
b = shapely.make_valid(shapely.set_precision(b, _OVERLAY_GRID_M, mode="pointwise"))
return shapely.area(shapely.intersection(a, b))
def _accumulate_clipped_area(
geoms: np.ndarray,
circles: np.ndarray,
@ -294,8 +331,8 @@ def _accumulate_clipped_area(
if geom_index.size == 0:
return
clipped_area = shapely.area(
shapely.intersection(geoms[geom_index], circles[postcode_index])
clipped_area = _robust_intersection_area(
geoms[geom_index], circles[postcode_index]
)
positive = clipped_area > 0
geom_index = geom_index[positive]