don't crash
This commit is contained in:
parent
aab85fe32e
commit
d6d20ccd37
13 changed files with 2630 additions and 3924 deletions
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""Robust GEOS overlay helpers.
|
||||
|
||||
Overlay operations (union, difference, intersection) can raise a
|
||||
``GEOSException`` — most often ``TopologyException: side location conflict``,
|
||||
``Ring edge missing``, or ``found non-noded intersection`` — on geometries that
|
||||
contain near-coincident or near-degenerate edges, or that are individually
|
||||
invalid. The robust remedy is a *fixed-precision* overlay: GEOS's OverlayNG
|
||||
engine, handed a grid size, nodes every edge onto that grid and finishes where
|
||||
the full-precision overlay cannot.
|
||||
|
||||
Getting the fallback to actually survive takes three rules, each of which we
|
||||
learned the hard way from a crash:
|
||||
|
||||
1. **Never precision-reduce with the default mode.** ``set_precision``'s default
|
||||
``valid_output`` (and ``keep_collapsed``) mode runs its *own* noding pass that
|
||||
re-raises the very ``side location conflict`` we are trying to escape. We push
|
||||
the grid into the overlay via the ``grid_size`` argument instead — where
|
||||
OverlayNG nodes robustly — and only ever call ``set_precision`` in
|
||||
``pointwise`` mode (pure coordinate rounding, which cannot raise).
|
||||
2. **Validate first.** ``make_valid`` repairs the self-intersections (bow-ties,
|
||||
pinches) that make GEOS choke, so the overlay starts from an OGC-valid shape.
|
||||
3. **Keep only polygonal parts.** ``make_valid`` of a spiky polygon routinely
|
||||
returns a *mixed-dimension* ``GeometryCollection`` (a polygon plus a dangling
|
||||
line), and OverlayNG rejects mixed-dimension input with
|
||||
``IllegalArgumentException: Overlay input is mixed-dimension``. Every input
|
||||
here represents an area, so the line/point debris is meaningless noise and is
|
||||
dropped before the overlay.
|
||||
|
||||
The default fallback grid is 0.1 mm in the metre-based working CRS
|
||||
(EPSG:27700), far below survey resolution and the ``MIN_GEOM_AREA`` threshold
|
||||
downstream, so it cannot crush a postcode-scale shape. Callers operating in a
|
||||
different CRS (e.g. the WGS84 output stage, where the same 1e-4 grid would be
|
||||
~11 m) pass a ``grid`` matched to that CRS. The whole fallback runs ONLY after
|
||||
the exact operation has already failed, so normal output is bit-for-bit
|
||||
unchanged.
|
||||
"""
|
||||
|
||||
import shapely
|
||||
from shapely import GEOSException, make_valid, set_precision
|
||||
from shapely.geometry import Polygon
|
||||
from shapely.ops import unary_union
|
||||
|
||||
# 0.1 mm in metres — well below MIN_GEOM_AREA (0.01 m^2) and survey resolution.
|
||||
_SNAP_GRID = 1e-4
|
||||
|
||||
_EMPTY = Polygon()
|
||||
|
||||
|
||||
def _poly_valid(geom, grid):
|
||||
"""Return an OGC-valid, polygonal-only version of ``geom``.
|
||||
|
||||
Repairs invalidity with ``make_valid`` and discards any non-polygonal debris
|
||||
(dangling lines/points) it leaves behind, since OverlayNG rejects
|
||||
mixed-dimension ``GeometryCollection`` input. The result is always a
|
||||
``Polygon``/``MultiPolygon`` (possibly empty), safe to feed to a
|
||||
fixed-precision overlay.
|
||||
"""
|
||||
g = geom if geom.is_valid else make_valid(geom)
|
||||
if g.geom_type in ("Polygon", "MultiPolygon"):
|
||||
return g
|
||||
if g.geom_type == "GeometryCollection":
|
||||
polys = [
|
||||
p
|
||||
for p in g.geoms
|
||||
if p.geom_type in ("Polygon", "MultiPolygon") and not p.is_empty
|
||||
]
|
||||
if not polys:
|
||||
return _EMPTY
|
||||
if len(polys) == 1:
|
||||
return polys[0]
|
||||
# Dissolve on the grid (robust) rather than building a possibly-invalid
|
||||
# MultiPolygon of overlapping parts.
|
||||
return shapely.union_all(polys, grid_size=grid)
|
||||
return _EMPTY # line / point only
|
||||
|
||||
|
||||
def _snap_poly_valid(geom, grid):
|
||||
"""Coordinate-round onto ``grid`` (``pointwise`` never raises), then clean."""
|
||||
return _poly_valid(set_precision(geom, grid, mode="pointwise"), grid)
|
||||
|
||||
|
||||
def safe_union(geoms, grid=_SNAP_GRID):
|
||||
"""``unary_union`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return unary_union(geoms)
|
||||
except GEOSException:
|
||||
pass
|
||||
cleaned = [_poly_valid(g, grid) for g in geoms]
|
||||
try:
|
||||
return shapely.union_all(cleaned, grid_size=grid)
|
||||
except GEOSException:
|
||||
snapped = [_snap_poly_valid(g, grid) for g in geoms]
|
||||
return shapely.union_all(snapped, grid_size=grid)
|
||||
|
||||
|
||||
def safe_difference(a, b, grid=_SNAP_GRID):
|
||||
"""``a.difference(b)`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return a.difference(b)
|
||||
except GEOSException:
|
||||
pass
|
||||
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||
try:
|
||||
return shapely.difference(a2, b2, grid_size=grid)
|
||||
except GEOSException:
|
||||
return shapely.difference(
|
||||
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||
)
|
||||
|
||||
|
||||
def safe_intersection(a, b, grid=_SNAP_GRID):
|
||||
"""``a.intersection(b)`` that survives GEOS robustness failures."""
|
||||
try:
|
||||
return a.intersection(b)
|
||||
except GEOSException:
|
||||
pass
|
||||
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||
try:
|
||||
return shapely.intersection(a2, b2, grid_size=grid)
|
||||
except GEOSException:
|
||||
return shapely.intersection(
|
||||
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue