don't crash

This commit is contained in:
Andras Schmelczer 2026-06-04 20:40:42 +01:00
parent aab85fe32e
commit d6d20ccd37
13 changed files with 2630 additions and 3924 deletions

View file

@ -7,11 +7,12 @@ import numpy as np
from pyproj import Transformer
from shapely import STRtree, make_valid, set_precision
from shapely.errors import GEOSException
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
from shapely.ops import transform as transform_geometry
from shapely.ops import unary_union
from tqdm import tqdm
from .geometry import safe_difference, safe_union
_to_wgs84 = None
@ -207,7 +208,7 @@ def merge_fragments(
merged = {}
for pc, parts in by_postcode.items():
combined = unary_union(parts)
combined = safe_union(parts)
if combined.is_empty:
continue
if not combined.is_valid:
@ -260,7 +261,10 @@ def _polygonal(geom):
]
if not polys:
return None
merged = unary_union(polys)
# Both callers run on WGS84-degree output geometry, so the robustness
# fallback snaps on the 1e-6° grid (~0.11 m), not geometry.py's metre
# default — a coarse metre grid would obliterate a degree-scale shape.
merged = safe_union(polys, grid=_OUTPUT_PRECISION_DEG)
return merged if not merged.is_empty else None
return None
@ -317,8 +321,13 @@ def _resolve_overlaps(
# Process losers from highest priority down, so every subtracted neighbour is
# already finalised.
for i in sorted(higher, key=lambda idx: rank[idx]):
cut = unary_union([out[j] for j in higher[i]])
trimmed = out[i].difference(cut)
# These geometries are WGS84 degrees already snapped to output precision,
# so the robustness fallback snaps on the same 1e-6° grid (~0.11 m) rather
# than geometry.py's metre-CRS default. A raw difference can still raise a
# "side location conflict" on near-coincident OA-seam edges; the
# fixed-precision overlay noded them away.
cut = safe_union([out[j] for j in higher[i]], grid=_OUTPUT_PRECISION_DEG)
trimmed = safe_difference(out[i], cut, grid=_OUTPUT_PRECISION_DEG)
if not trimmed.is_valid:
trimmed = make_valid(trimmed)
# Keep all polygonal parts: these geometries are in WGS84 degrees, so an
@ -336,13 +345,83 @@ def _round_coords(coords, ndigits=6):
return [_round_coords(c, ndigits) for c in coords]
def _snap_polygonal(geom, grid):
"""Re-node ``geom`` onto ``grid`` (``valid_output``) → polygonal-only, or None.
``set_precision`` ``valid_output`` runs an OverlayNG noding pass that places
every vertex on a multiple of ``grid`` *and* fixes the topology, so a plain
coordinate round of the result is exact (no two distinct vertices can land in
the same cell). Falls back to ``make_valid`` if precision-reduction raises.
"""
try:
snapped = set_precision(geom, grid, mode="valid_output")
except GEOSException:
snapped = make_valid(geom)
return _polygonal(snapped if snapped.is_valid else make_valid(snapped))
# A square this many output-grid cells on a side, used as the last-resort
# footprint when snapping erases a sub-grid sliver. ~10 cells (≈0.71.1 m at UK
# latitudes) is invisible at map scale yet survives the 1e-6° snap as a valid,
# 4-corner ring.
_FOOTPRINT_GRID_CELLS = 5
def _grid_footprint(geom):
"""A tiny grid-aligned square at ``geom``'s representative point, snapped valid.
Last line of defence so an *active* postcode never vanishes: the de-overlap
pass can shave a small (e.g. co-located, non-geographic) postcode down to a
sub-grid sliver that disappears when snapped to output precision. Rather than
drop it, place a minimal valid footprint at its location. The tiny overlap
this re-creates with the neighbour that trimmed it is harmless the output
partition is best-effort, a missing boundary is a hard validation failure.
"""
try:
point = geom.representative_point()
except GEOSException:
return None
half = _OUTPUT_PRECISION_DEG * _FOOTPRINT_GRID_CELLS
square = box(point.x - half, point.y - half, point.x + half, point.y + half)
return _snap_polygonal(square, _OUTPUT_PRECISION_DEG)
def _geojson_geometry(geom) -> dict | None:
"""Serialize a WGS84 polygon/multipolygon to a 6dp GeoJSON dict, or None."""
"""Serialize a WGS84 polygon/multipolygon to a *valid* 6dp GeoJSON dict, or None.
The coordinates are snapped onto the 1e-6° output grid with a re-noding pass
BEFORE the 6dp rounding, not by the round alone. ``_resolve_overlaps`` leaves
thin overlap-sliver triangles with full-precision (off-grid) vertices at OA
seams; a bare round-to-6dp collapses those into degenerate rings (GEOS "Too
few points") and pinches rings into self-intersections that pass the
pre-rounding validity check but fail once the feature is read back from disk.
Snapping with ``valid_output`` nodes the geometry onto the grid so the round
that follows lands on exact 1e-6 multiples and cannot reintroduce invalidity.
``None`` only for a genuinely empty/degenerate-with-no-location input; a
non-empty geometry that snaps to a sub-grid sliver is rescued into a minimal
grid footprint rather than dropped (an active postcode must keep a boundary).
"""
geom = _polygonal(geom if geom.is_valid else make_valid(geom))
if geom is None or geom.is_empty:
return None
gj = mapping(geom)
return {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
snapped = _snap_polygonal(geom, _OUTPUT_PRECISION_DEG)
if snapped is None or snapped.is_empty:
snapped = _grid_footprint(geom)
if snapped is None or snapped.is_empty:
return None
gj = mapping(snapped)
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
# Defence-in-depth: re-validate the dict that actually reaches disk. Snapping
# makes the round exact, so this should already hold; repair once more on the
# grid if a pathological vertex still pinches a ring.
if not shape(out).is_valid:
snapped = _snap_polygonal(shape(out), _OUTPUT_PRECISION_DEG)
if snapped is None or snapped.is_empty:
return None
gj = mapping(snapped)
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
return out
def write_district_geojson(