don't crash

This commit is contained in:
Andras Schmelczer 2026-06-04 20:40:42 +01:00
parent aab85fe32e
commit d6d20ccd37
13 changed files with 2630 additions and 3924 deletions

View file

@ -757,6 +757,50 @@ class TestProcessOAInspireParcelAssignment:
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
class TestProcessOASeedFootprintGuarantee:
"""Every postcode with a UPRN seed in a multi-postcode OA must produce a
fragment, even when its partition cell collapses below MIN_GEOM_AREA an
active postcode must never be dropped (validate_outputs is zero-tolerance)."""
def test_collapsed_voronoi_cells_rescued_as_footprints(self):
# OA just above MIN_GEOM_AREA; a 2-way Voronoi split leaves each cell
# (~0.0098 m²) below MIN_GEOM_AREA, so both would be dropped without rescue.
oa_geom = box(0, 0, 0.14, 0.14) # 0.0196 m²
points = np.array([[0.035, 0.07], [0.105, 0.07]])
postcodes = ["AA1 1AA", "AA1 1AB"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
got = {pc for pc, _ in fragments}
assert got == {"AA1 1AA", "AA1 1AB"}, f"dropped: {set(postcodes) - got}"
for _, geom in fragments:
assert geom.is_valid
assert geom.area > MIN_GEOM_AREA
def test_seed_footprint_sits_on_the_uprn(self):
from shapely.geometry import Point
from .process_oa import _seed_footprints
oa_geom = box(0, 0, 1000, 1000)
points = np.array([[500.0, 500.0]])
out = _seed_footprints({"AA1 1AA"}, points, ["AA1 1AA"], oa_geom)
assert len(out) == 1
pc, geom = out[0]
assert pc == "AA1 1AA"
assert geom.is_valid and geom.area > MIN_GEOM_AREA
assert geom.contains(Point(500, 500))
def test_only_orphans_get_footprints(self):
# A wins real area; B's lone seed collapses. Only B should be rescued, and
# A's genuine (large) fragment must be untouched by the rescue.
oa_geom = box(0, 0, 0.14, 0.14)
points = np.array([[0.07, 0.07], [0.07, 0.07], [0.105, 0.07]])
postcodes = ["AA1 1AA", "AA1 1AA", "AA1 1AB"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
assert {pc for pc, _ in fragments} == {"AA1 1AA", "AA1 1AB"}
# ---------------------------------------------------------------------------
# _extract_polygonal helper
# ---------------------------------------------------------------------------
@ -1221,6 +1265,238 @@ class TestOutputPartition:
assert geoms["AA1 1AB"].area > 0
class TestGeojsonGeometrySliverValidity:
"""_geojson_geometry must emit geometry that is still valid after the final 6dp
rounding. A sub-grid overlap sliver (left by _resolve_overlaps' full-precision
difference) must be snapped away on the output grid rather than collapsed by a
naive round into a degenerate ('Too few points') / self-intersecting ring that
only fails once read back from disk."""
def test_subgrid_sliver_does_not_produce_invalid_ring(self):
from shapely.geometry import shape
from shapely.validation import explain_validity
from .output import _geojson_geometry
main = box(-0.34, 51.74, -0.33, 51.75) # ~degree-scale, valid
# A thin triangle whose three vertices all round to the same 1e-6 cell.
sliver = Polygon(
[
(-0.3400284, 51.7505061),
(-0.3400280, 51.7505060),
(-0.3400281, 51.7505063),
]
)
assert sliver.is_valid
gj = _geojson_geometry(MultiPolygon([main, sliver]))
assert gj is not None
rt = shape(gj)
assert rt.is_valid, explain_validity(rt)
assert not rt.is_empty
# The real (main) area is preserved; only the degenerate sliver is dropped.
assert rt.area == pytest.approx(main.area, rel=1e-3)
def test_pure_sliver_is_rescued_not_written_invalid(self):
from shapely.geometry import shape
from .output import _OUTPUT_PRECISION_DEG, _geojson_geometry
# A sub-grid sliver (extent < 1e-6° in one dimension) that snaps to empty:
# it must be rescued into a minimal valid footprint at its location, never
# written invalid and never dropped (an active postcode keeps a boundary).
g = _OUTPUT_PRECISION_DEG
sliver = Polygon(
[
(-0.30, 51.75),
(-0.30 + 5 * g, 51.75),
(-0.30 + 5 * g, 51.75 + 0.2 * g),
(-0.30, 51.75 + 0.1 * g),
]
)
gj = _geojson_geometry(sliver)
assert gj is not None # rescued, not dropped
rt = shape(gj)
assert rt.is_valid and not rt.is_empty
assert rt.distance(sliver) == pytest.approx(0.0, abs=1e-4)
class TestColocatedPostcodesAllRetained:
"""Co-located non-geographic postcodes (e.g. AL1 9xx) have heavily-overlapping
tiny footprints; the de-overlap pass trims most to sub-grid slivers. None may
be dropped every active postcode must keep a (valid, non-empty) boundary."""
def test_overlapping_tiny_footprints_none_dropped(self, tmp_path):
from shapely.geometry import Point, shape
# 12 ~3 m discs within ~1 m of each other → after de-overlap most collapse
# below output precision.
postcodes = {}
for i in range(12):
e = 514000.0 + (i % 4) * 0.3
n = 206000.0 + (i // 4) * 0.3
postcodes[f"AL1 9{chr(65 + i)}{chr(65 + i)}"] = Point(e, n).buffer(3.0)
write_district_geojson(postcodes, tmp_path)
coll = json.loads((tmp_path / "units" / "AL1.geojson").read_text())
written = {f["properties"]["postcodes"] for f in coll["features"]}
assert written == set(postcodes), f"dropped: {set(postcodes) - written}"
for feature in coll["features"]:
geom = shape(feature["geometry"])
assert geom.is_valid and not geom.is_empty
class TestSafeOverlayHelpers:
"""The robust overlay helpers retry on a fixed-precision grid after a
GEOSException (e.g. ``side location conflict`` from near-coincident OA-seam
edges). The grid is a caller-supplied parameter: metres for the BNG stages,
1e-6 degrees for the WGS84 output stage so the same helper serves both
without crushing degree-scale shapes on the metre default."""
def test_grid_param_honored_on_clean_inputs(self):
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
# No exception → exact result, identical regardless of the fallback grid.
assert geometry.safe_difference(a, b, grid=1e-6).equals(a.difference(b))
assert geometry.safe_union([a, b], grid=1e-6).equals(unary_union([a, b]))
assert geometry.safe_intersection(a, b, grid=1e-6).equals(a.intersection(b))
def test_difference_falls_back_to_fixed_precision_overlay(self, monkeypatch):
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
def _boom(self, other, *args, **kwargs):
raise GEOSException("forced side location conflict")
# Patch the .difference() *method*; the fallback uses the top-level
# shapely.difference() function, so it still completes.
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(a, b, grid=1e-6)
assert result.is_valid
assert result.area == pytest.approx(50.0) # left half of `a`
def test_union_falls_back_to_fixed_precision_overlay(self, monkeypatch):
from shapely import GEOSException
from . import geometry
a = box(0, 0, 10, 10)
b = box(5, 0, 15, 10)
def _boom(_geoms):
raise GEOSException("forced robustness failure")
monkeypatch.setattr(geometry, "unary_union", _boom)
result = geometry.safe_union([a, b], grid=1e-6)
assert result.is_valid
assert result.area == pytest.approx(150.0) # 100 + 100 - 50 overlap
# A self-intersecting bow-tie: invalid. set_precision()'s DEFAULT
# ('valid_output') mode runs its own noding pass that re-raises
# 'side location conflict' on this — which is exactly how the production
# crash happened (the fallback re-raised the error it was meant to absorb).
_BOWTIE = Polygon([(0, 0), (1e-5, 1e-5), (1e-5, 0), (0, 1e-5), (0, 0)])
# make_valid() of this spiky ring returns a mixed-dimension
# GeometryCollection (polygon + dangling line); OverlayNG rejects that with
# 'Overlay input is mixed-dimension', so the fallback must strip the debris.
_SPIKY = Polygon(
[(0, 0), (10, 0), (10, 10), (5, 5), (5.0000001, 5), (0, 10), (0, 0), (15, -5), (0, 0)]
)
def test_difference_fallback_survives_invalid_input(self, monkeypatch):
"""Regression for the production crash: the fallback must not reduce
precision with set_precision's default valid_output mode, which re-raises
'side location conflict' on an invalid geometry."""
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
assert not self._BOWTIE.is_valid # precondition
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(self._BOWTIE, box(0, 0, 5e-6, 5e-6), grid=1e-6)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
def test_difference_fallback_survives_make_valid_geometrycollection(
self, monkeypatch
):
"""Regression: make_valid can yield a mixed-dimension GeometryCollection
that OverlayNG rejects; the fallback must keep only polygonal parts."""
from shapely import GEOSException, make_valid
from shapely.geometry.base import BaseGeometry
from . import geometry
assert make_valid(self._SPIKY).geom_type == "GeometryCollection" # precondition
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "difference", _boom)
result = geometry.safe_difference(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
assert result.area > 0
def test_intersection_fallback_survives_make_valid_geometrycollection(
self, monkeypatch
):
from shapely import GEOSException
from shapely.geometry.base import BaseGeometry
from . import geometry
def _boom(self, other, *a, **k):
raise GEOSException("forced side location conflict")
monkeypatch.setattr(BaseGeometry, "intersection", _boom)
result = geometry.safe_intersection(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
assert result.area > 0
def test_union_fallback_survives_invalid_and_collection_inputs(self, monkeypatch):
"""The union fallback must absorb both an invalid bow-tie and a
make_valid-becomes-GeometryCollection input without raising."""
from shapely import GEOSException
from . import geometry
def _boom(_geoms):
raise GEOSException("forced robustness failure")
monkeypatch.setattr(geometry, "unary_union", _boom)
result = geometry.safe_union(
[self._BOWTIE, self._SPIKY, box(20, 20, 30, 30)], grid=1e-4
)
assert result.is_valid
assert result.geom_type in ("Polygon", "MultiPolygon")
def test_helpers_never_raise_on_empty_inputs(self):
"""Degenerate/empty inputs must not abort a multi-hour run."""
from . import geometry
empty = Polygon()
a = box(0, 0, 10, 10)
assert geometry.safe_difference(empty, a).is_empty
assert geometry.safe_difference(a, empty).equals(a)
assert geometry.safe_intersection(empty, a).is_empty
assert geometry.safe_union([]).is_empty
assert geometry.safe_union([empty]).is_empty
# ---------------------------------------------------------------------------
# InspireIndex must return the same candidates as a brute-force bbox scan
# ---------------------------------------------------------------------------