don't crash
This commit is contained in:
parent
aab85fe32e
commit
d6d20ccd37
13 changed files with 2630 additions and 3924 deletions
|
|
@ -115,7 +115,7 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
|
||||||
transform-school-proximity transform-tree-density \
|
transform-school-proximity transform-tree-density \
|
||||||
generate-postcode-boundaries generate-travel-times enrich-actual-listings
|
generate-postcode-boundaries generate-travel-times enrich-actual-listings
|
||||||
|
|
||||||
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles property-border-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
|
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles property-border-tiles tree-overlay-tiles crime-hotspot-tiles property-border-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
|
||||||
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX) --postcode-boundary-match "$(POSTCODES_PQ)::$(PC_BOUNDARIES)" --postcode-features $(POSTCODES_PQ) --postcode-universe "$(ARCGIS)::$(POSTCODES_PQ)" --properties-subset "$(PROPERTIES_PQ)::$(POSTCODES_PQ)" --price-index $(PRICE_INDEX)
|
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX) --postcode-boundary-match "$(POSTCODES_PQ)::$(PC_BOUNDARIES)" --postcode-features $(POSTCODES_PQ) --postcode-universe "$(ARCGIS)::$(POSTCODES_PQ)" --properties-subset "$(PROPERTIES_PQ)::$(POSTCODES_PQ)" --price-index $(PRICE_INDEX)
|
||||||
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
|
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
|
||||||
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --postcode-features $(POSTCODES_PQ) --postcode-universe "$(ARCGIS)::$(POSTCODES_PQ)" --properties-subset "$(PROPERTIES_PQ)::$(POSTCODES_PQ)"
|
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --postcode-features $(POSTCODES_PQ) --postcode-universe "$(ARCGIS)::$(POSTCODES_PQ)" --properties-subset "$(PROPERTIES_PQ)::$(POSTCODES_PQ)"
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -72,6 +72,18 @@ assert len(EXPECTED_BAND_NAMES) == len(AGE_BANDS), (
|
||||||
"EXPECTED_BAND_NAMES and AGE_BANDS must stay aligned 1:1"
|
"EXPECTED_BAND_NAMES and AGE_BANDS must stay aligned 1:1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# NOMIS sometimes labels a band with a wording variant that denotes the SAME
|
||||||
|
# age range (e.g. "Aged 4 years and under" for ages 0-4, "Aged 90 years and
|
||||||
|
# over" wording for the top band). Map such known-equivalent labels back to the
|
||||||
|
# canonical name BEFORE validation so a real band change still fails loudly,
|
||||||
|
# but a cosmetic relabel of an identical range does not block the build.
|
||||||
|
BAND_NAME_ALIASES = {
|
||||||
|
"Aged 4 years and under": "Aged 0 to 4 years",
|
||||||
|
}
|
||||||
|
assert set(BAND_NAME_ALIASES.values()) <= set(EXPECTED_BAND_NAMES), (
|
||||||
|
"BAND_NAME_ALIASES must map to canonical EXPECTED_BAND_NAMES"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def compute_median_age(counts: list[int]) -> float:
|
def compute_median_age(counts: list[int]) -> float:
|
||||||
"""Compute median age from five-year band counts using linear interpolation."""
|
"""Compute median age from five-year band counts using linear interpolation."""
|
||||||
|
|
@ -98,6 +110,15 @@ def _bands_to_median_table(pivoted: pl.DataFrame) -> pl.DataFrame:
|
||||||
missing/extra/relabelled band would otherwise silently mis-align counts
|
missing/extra/relabelled band would otherwise silently mis-align counts
|
||||||
against the wrong AGE_BANDS lower bound, so we fail loudly instead.
|
against the wrong AGE_BANDS lower bound, so we fail loudly instead.
|
||||||
"""
|
"""
|
||||||
|
# Normalise known-equivalent NOMIS label variants to their canonical name
|
||||||
|
# before validating (renaming onto an already-present canonical column would
|
||||||
|
# collide, so polars raises loudly in that genuinely ambiguous case).
|
||||||
|
rename_map = {
|
||||||
|
c: BAND_NAME_ALIASES[c] for c in pivoted.columns if c in BAND_NAME_ALIASES
|
||||||
|
}
|
||||||
|
if rename_map:
|
||||||
|
pivoted = pivoted.rename(rename_map)
|
||||||
|
|
||||||
# Validate the pivoted age-band columns against the canonical NOMIS set
|
# Validate the pivoted age-band columns against the canonical NOMIS set
|
||||||
# BEFORE computing anything.
|
# BEFORE computing anything.
|
||||||
band_cols = [c for c in pivoted.columns if c != "GEOGRAPHY_CODE"]
|
band_cols = [c for c in pivoted.columns if c != "GEOGRAPHY_CODE"]
|
||||||
|
|
|
||||||
|
|
@ -266,8 +266,12 @@ def _download_tile(
|
||||||
except (
|
except (
|
||||||
NoGeoTiffError,
|
NoGeoTiffError,
|
||||||
httpx.HTTPStatusError,
|
httpx.HTTPStatusError,
|
||||||
httpx.TimeoutException,
|
# TransportError is the superset of TimeoutException, ConnectError,
|
||||||
httpx.ConnectError,
|
# ReadError and ProtocolError — including RemoteProtocolError, raised
|
||||||
|
# when the WCS server closes the connection mid-stream ("incomplete
|
||||||
|
# chunked read"). All are transient; retry/split rather than letting
|
||||||
|
# one flaky tile crash the whole raster download.
|
||||||
|
httpx.TransportError,
|
||||||
) as e:
|
) as e:
|
||||||
last_error = e
|
last_error = e
|
||||||
if attempt < MAX_RETRIES:
|
if attempt < MAX_RETRIES:
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,21 @@ def test_null_band_count_is_treated_as_zero_not_crash():
|
||||||
assert table["median_age"][0] == pytest.approx(32.5)
|
assert table["median_age"][0] == pytest.approx(32.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_equivalent_band_label_alias_is_accepted():
|
||||||
|
# NOMIS relabelled the first band "Aged 4 years and under" (same as ages
|
||||||
|
# 0-4). It must be normalised to the canonical name and used as band 0, not
|
||||||
|
# rejected. All 100 people in that band -> median in the 0-4 range: 2.5.
|
||||||
|
counts_by_band = {name: [0] for name in EXPECTED_BAND_NAMES}
|
||||||
|
counts_by_band["Aged 4 years and under"] = counts_by_band.pop("Aged 0 to 4 years")
|
||||||
|
counts_by_band["Aged 4 years and under"] = [100]
|
||||||
|
pivoted = _pivoted(counts_by_band)
|
||||||
|
|
||||||
|
table = median_age._bands_to_median_table(pivoted)
|
||||||
|
|
||||||
|
assert table.height == 1
|
||||||
|
assert table["median_age"][0] == pytest.approx(2.5)
|
||||||
|
|
||||||
|
|
||||||
def test_missing_band_raises_clear_error():
|
def test_missing_band_raises_clear_error():
|
||||||
counts_by_band = {name: [10] for name in EXPECTED_BAND_NAMES}
|
counts_by_band = {name: [10] for name in EXPECTED_BAND_NAMES}
|
||||||
del counts_by_band["Aged 85 years and over"]
|
del counts_by_band["Aged 85 years and over"]
|
||||||
|
|
|
||||||
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
123
pipeline/transform/postcode_boundaries/geometry.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
"""Robust GEOS overlay helpers.
|
||||||
|
|
||||||
|
Overlay operations (union, difference, intersection) can raise a
|
||||||
|
``GEOSException`` — most often ``TopologyException: side location conflict``,
|
||||||
|
``Ring edge missing``, or ``found non-noded intersection`` — on geometries that
|
||||||
|
contain near-coincident or near-degenerate edges, or that are individually
|
||||||
|
invalid. The robust remedy is a *fixed-precision* overlay: GEOS's OverlayNG
|
||||||
|
engine, handed a grid size, nodes every edge onto that grid and finishes where
|
||||||
|
the full-precision overlay cannot.
|
||||||
|
|
||||||
|
Getting the fallback to actually survive takes three rules, each of which we
|
||||||
|
learned the hard way from a crash:
|
||||||
|
|
||||||
|
1. **Never precision-reduce with the default mode.** ``set_precision``'s default
|
||||||
|
``valid_output`` (and ``keep_collapsed``) mode runs its *own* noding pass that
|
||||||
|
re-raises the very ``side location conflict`` we are trying to escape. We push
|
||||||
|
the grid into the overlay via the ``grid_size`` argument instead — where
|
||||||
|
OverlayNG nodes robustly — and only ever call ``set_precision`` in
|
||||||
|
``pointwise`` mode (pure coordinate rounding, which cannot raise).
|
||||||
|
2. **Validate first.** ``make_valid`` repairs the self-intersections (bow-ties,
|
||||||
|
pinches) that make GEOS choke, so the overlay starts from an OGC-valid shape.
|
||||||
|
3. **Keep only polygonal parts.** ``make_valid`` of a spiky polygon routinely
|
||||||
|
returns a *mixed-dimension* ``GeometryCollection`` (a polygon plus a dangling
|
||||||
|
line), and OverlayNG rejects mixed-dimension input with
|
||||||
|
``IllegalArgumentException: Overlay input is mixed-dimension``. Every input
|
||||||
|
here represents an area, so the line/point debris is meaningless noise and is
|
||||||
|
dropped before the overlay.
|
||||||
|
|
||||||
|
The default fallback grid is 0.1 mm in the metre-based working CRS
|
||||||
|
(EPSG:27700), far below survey resolution and the ``MIN_GEOM_AREA`` threshold
|
||||||
|
downstream, so it cannot crush a postcode-scale shape. Callers operating in a
|
||||||
|
different CRS (e.g. the WGS84 output stage, where the same 1e-4 grid would be
|
||||||
|
~11 m) pass a ``grid`` matched to that CRS. The whole fallback runs ONLY after
|
||||||
|
the exact operation has already failed, so normal output is bit-for-bit
|
||||||
|
unchanged.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import shapely
|
||||||
|
from shapely import GEOSException, make_valid, set_precision
|
||||||
|
from shapely.geometry import Polygon
|
||||||
|
from shapely.ops import unary_union
|
||||||
|
|
||||||
|
# 0.1 mm in metres — well below MIN_GEOM_AREA (0.01 m^2) and survey resolution.
|
||||||
|
_SNAP_GRID = 1e-4
|
||||||
|
|
||||||
|
_EMPTY = Polygon()
|
||||||
|
|
||||||
|
|
||||||
|
def _poly_valid(geom, grid):
|
||||||
|
"""Return an OGC-valid, polygonal-only version of ``geom``.
|
||||||
|
|
||||||
|
Repairs invalidity with ``make_valid`` and discards any non-polygonal debris
|
||||||
|
(dangling lines/points) it leaves behind, since OverlayNG rejects
|
||||||
|
mixed-dimension ``GeometryCollection`` input. The result is always a
|
||||||
|
``Polygon``/``MultiPolygon`` (possibly empty), safe to feed to a
|
||||||
|
fixed-precision overlay.
|
||||||
|
"""
|
||||||
|
g = geom if geom.is_valid else make_valid(geom)
|
||||||
|
if g.geom_type in ("Polygon", "MultiPolygon"):
|
||||||
|
return g
|
||||||
|
if g.geom_type == "GeometryCollection":
|
||||||
|
polys = [
|
||||||
|
p
|
||||||
|
for p in g.geoms
|
||||||
|
if p.geom_type in ("Polygon", "MultiPolygon") and not p.is_empty
|
||||||
|
]
|
||||||
|
if not polys:
|
||||||
|
return _EMPTY
|
||||||
|
if len(polys) == 1:
|
||||||
|
return polys[0]
|
||||||
|
# Dissolve on the grid (robust) rather than building a possibly-invalid
|
||||||
|
# MultiPolygon of overlapping parts.
|
||||||
|
return shapely.union_all(polys, grid_size=grid)
|
||||||
|
return _EMPTY # line / point only
|
||||||
|
|
||||||
|
|
||||||
|
def _snap_poly_valid(geom, grid):
|
||||||
|
"""Coordinate-round onto ``grid`` (``pointwise`` never raises), then clean."""
|
||||||
|
return _poly_valid(set_precision(geom, grid, mode="pointwise"), grid)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_union(geoms, grid=_SNAP_GRID):
|
||||||
|
"""``unary_union`` that survives GEOS robustness failures."""
|
||||||
|
try:
|
||||||
|
return unary_union(geoms)
|
||||||
|
except GEOSException:
|
||||||
|
pass
|
||||||
|
cleaned = [_poly_valid(g, grid) for g in geoms]
|
||||||
|
try:
|
||||||
|
return shapely.union_all(cleaned, grid_size=grid)
|
||||||
|
except GEOSException:
|
||||||
|
snapped = [_snap_poly_valid(g, grid) for g in geoms]
|
||||||
|
return shapely.union_all(snapped, grid_size=grid)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_difference(a, b, grid=_SNAP_GRID):
|
||||||
|
"""``a.difference(b)`` that survives GEOS robustness failures."""
|
||||||
|
try:
|
||||||
|
return a.difference(b)
|
||||||
|
except GEOSException:
|
||||||
|
pass
|
||||||
|
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||||
|
try:
|
||||||
|
return shapely.difference(a2, b2, grid_size=grid)
|
||||||
|
except GEOSException:
|
||||||
|
return shapely.difference(
|
||||||
|
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_intersection(a, b, grid=_SNAP_GRID):
|
||||||
|
"""``a.intersection(b)`` that survives GEOS robustness failures."""
|
||||||
|
try:
|
||||||
|
return a.intersection(b)
|
||||||
|
except GEOSException:
|
||||||
|
pass
|
||||||
|
a2, b2 = _poly_valid(a, grid), _poly_valid(b, grid)
|
||||||
|
try:
|
||||||
|
return shapely.intersection(a2, b2, grid_size=grid)
|
||||||
|
except GEOSException:
|
||||||
|
return shapely.intersection(
|
||||||
|
_snap_poly_valid(a2, grid), _snap_poly_valid(b2, grid), grid_size=grid
|
||||||
|
)
|
||||||
|
|
@ -5,9 +5,10 @@ from pathlib import Path
|
||||||
import polars as pl
|
import polars as pl
|
||||||
from shapely import wkb
|
from shapely import wkb
|
||||||
from shapely.geometry import MultiPolygon, Polygon
|
from shapely.geometry import MultiPolygon, Polygon
|
||||||
from shapely.ops import unary_union
|
|
||||||
from shapely.strtree import STRtree
|
from shapely.strtree import STRtree
|
||||||
|
|
||||||
|
from .geometry import safe_difference, safe_union
|
||||||
|
|
||||||
|
|
||||||
def load_greenspace(path: Path) -> tuple[STRtree, list]:
|
def load_greenspace(path: Path) -> tuple[STRtree, list]:
|
||||||
"""Load greenspace parquet and build an STRtree spatial index.
|
"""Load greenspace parquet and build an STRtree spatial index.
|
||||||
|
|
@ -51,8 +52,8 @@ def subtract_greenspace(
|
||||||
if not intersecting:
|
if not intersecting:
|
||||||
return postcode_geom
|
return postcode_geom
|
||||||
|
|
||||||
green_union = unary_union(intersecting)
|
green_union = safe_union(intersecting)
|
||||||
result = postcode_geom.difference(green_union)
|
result = safe_difference(postcode_geom, green_union)
|
||||||
|
|
||||||
if result.is_empty:
|
if result.is_empty:
|
||||||
return postcode_geom
|
return postcode_geom
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,12 @@ import numpy as np
|
||||||
from pyproj import Transformer
|
from pyproj import Transformer
|
||||||
from shapely import STRtree, make_valid, set_precision
|
from shapely import STRtree, make_valid, set_precision
|
||||||
from shapely.errors import GEOSException
|
from shapely.errors import GEOSException
|
||||||
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
|
from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
|
||||||
from shapely.ops import transform as transform_geometry
|
from shapely.ops import transform as transform_geometry
|
||||||
from shapely.ops import unary_union
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from .geometry import safe_difference, safe_union
|
||||||
|
|
||||||
_to_wgs84 = None
|
_to_wgs84 = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -207,7 +208,7 @@ def merge_fragments(
|
||||||
|
|
||||||
merged = {}
|
merged = {}
|
||||||
for pc, parts in by_postcode.items():
|
for pc, parts in by_postcode.items():
|
||||||
combined = unary_union(parts)
|
combined = safe_union(parts)
|
||||||
if combined.is_empty:
|
if combined.is_empty:
|
||||||
continue
|
continue
|
||||||
if not combined.is_valid:
|
if not combined.is_valid:
|
||||||
|
|
@ -260,7 +261,10 @@ def _polygonal(geom):
|
||||||
]
|
]
|
||||||
if not polys:
|
if not polys:
|
||||||
return None
|
return None
|
||||||
merged = unary_union(polys)
|
# Both callers run on WGS84-degree output geometry, so the robustness
|
||||||
|
# fallback snaps on the 1e-6° grid (~0.11 m), not geometry.py's metre
|
||||||
|
# default — a coarse metre grid would obliterate a degree-scale shape.
|
||||||
|
merged = safe_union(polys, grid=_OUTPUT_PRECISION_DEG)
|
||||||
return merged if not merged.is_empty else None
|
return merged if not merged.is_empty else None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -317,8 +321,13 @@ def _resolve_overlaps(
|
||||||
# Process losers from highest priority down, so every subtracted neighbour is
|
# Process losers from highest priority down, so every subtracted neighbour is
|
||||||
# already finalised.
|
# already finalised.
|
||||||
for i in sorted(higher, key=lambda idx: rank[idx]):
|
for i in sorted(higher, key=lambda idx: rank[idx]):
|
||||||
cut = unary_union([out[j] for j in higher[i]])
|
# These geometries are WGS84 degrees already snapped to output precision,
|
||||||
trimmed = out[i].difference(cut)
|
# so the robustness fallback snaps on the same 1e-6° grid (~0.11 m) rather
|
||||||
|
# than geometry.py's metre-CRS default. A raw difference can still raise a
|
||||||
|
# "side location conflict" on near-coincident OA-seam edges; the
|
||||||
|
# fixed-precision overlay noded them away.
|
||||||
|
cut = safe_union([out[j] for j in higher[i]], grid=_OUTPUT_PRECISION_DEG)
|
||||||
|
trimmed = safe_difference(out[i], cut, grid=_OUTPUT_PRECISION_DEG)
|
||||||
if not trimmed.is_valid:
|
if not trimmed.is_valid:
|
||||||
trimmed = make_valid(trimmed)
|
trimmed = make_valid(trimmed)
|
||||||
# Keep all polygonal parts: these geometries are in WGS84 degrees, so an
|
# Keep all polygonal parts: these geometries are in WGS84 degrees, so an
|
||||||
|
|
@ -336,13 +345,83 @@ def _round_coords(coords, ndigits=6):
|
||||||
return [_round_coords(c, ndigits) for c in coords]
|
return [_round_coords(c, ndigits) for c in coords]
|
||||||
|
|
||||||
|
|
||||||
|
def _snap_polygonal(geom, grid):
|
||||||
|
"""Re-node ``geom`` onto ``grid`` (``valid_output``) → polygonal-only, or None.
|
||||||
|
|
||||||
|
``set_precision`` ``valid_output`` runs an OverlayNG noding pass that places
|
||||||
|
every vertex on a multiple of ``grid`` *and* fixes the topology, so a plain
|
||||||
|
coordinate round of the result is exact (no two distinct vertices can land in
|
||||||
|
the same cell). Falls back to ``make_valid`` if precision-reduction raises.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
snapped = set_precision(geom, grid, mode="valid_output")
|
||||||
|
except GEOSException:
|
||||||
|
snapped = make_valid(geom)
|
||||||
|
return _polygonal(snapped if snapped.is_valid else make_valid(snapped))
|
||||||
|
|
||||||
|
|
||||||
|
# A square this many output-grid cells on a side, used as the last-resort
|
||||||
|
# footprint when snapping erases a sub-grid sliver. ~10 cells (≈0.7–1.1 m at UK
|
||||||
|
# latitudes) is invisible at map scale yet survives the 1e-6° snap as a valid,
|
||||||
|
# 4-corner ring.
|
||||||
|
_FOOTPRINT_GRID_CELLS = 5
|
||||||
|
|
||||||
|
|
||||||
|
def _grid_footprint(geom):
|
||||||
|
"""A tiny grid-aligned square at ``geom``'s representative point, snapped valid.
|
||||||
|
|
||||||
|
Last line of defence so an *active* postcode never vanishes: the de-overlap
|
||||||
|
pass can shave a small (e.g. co-located, non-geographic) postcode down to a
|
||||||
|
sub-grid sliver that disappears when snapped to output precision. Rather than
|
||||||
|
drop it, place a minimal valid footprint at its location. The tiny overlap
|
||||||
|
this re-creates with the neighbour that trimmed it is harmless — the output
|
||||||
|
partition is best-effort, a missing boundary is a hard validation failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
point = geom.representative_point()
|
||||||
|
except GEOSException:
|
||||||
|
return None
|
||||||
|
half = _OUTPUT_PRECISION_DEG * _FOOTPRINT_GRID_CELLS
|
||||||
|
square = box(point.x - half, point.y - half, point.x + half, point.y + half)
|
||||||
|
return _snap_polygonal(square, _OUTPUT_PRECISION_DEG)
|
||||||
|
|
||||||
|
|
||||||
def _geojson_geometry(geom) -> dict | None:
|
def _geojson_geometry(geom) -> dict | None:
|
||||||
"""Serialize a WGS84 polygon/multipolygon to a 6dp GeoJSON dict, or None."""
|
"""Serialize a WGS84 polygon/multipolygon to a *valid* 6dp GeoJSON dict, or None.
|
||||||
|
|
||||||
|
The coordinates are snapped onto the 1e-6° output grid with a re-noding pass
|
||||||
|
BEFORE the 6dp rounding, not by the round alone. ``_resolve_overlaps`` leaves
|
||||||
|
thin overlap-sliver triangles with full-precision (off-grid) vertices at OA
|
||||||
|
seams; a bare round-to-6dp collapses those into degenerate rings (GEOS "Too
|
||||||
|
few points") and pinches rings into self-intersections that pass the
|
||||||
|
pre-rounding validity check but fail once the feature is read back from disk.
|
||||||
|
Snapping with ``valid_output`` nodes the geometry onto the grid so the round
|
||||||
|
that follows lands on exact 1e-6 multiples and cannot reintroduce invalidity.
|
||||||
|
|
||||||
|
``None`` only for a genuinely empty/degenerate-with-no-location input; a
|
||||||
|
non-empty geometry that snaps to a sub-grid sliver is rescued into a minimal
|
||||||
|
grid footprint rather than dropped (an active postcode must keep a boundary).
|
||||||
|
"""
|
||||||
geom = _polygonal(geom if geom.is_valid else make_valid(geom))
|
geom = _polygonal(geom if geom.is_valid else make_valid(geom))
|
||||||
if geom is None or geom.is_empty:
|
if geom is None or geom.is_empty:
|
||||||
return None
|
return None
|
||||||
gj = mapping(geom)
|
snapped = _snap_polygonal(geom, _OUTPUT_PRECISION_DEG)
|
||||||
return {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
if snapped is None or snapped.is_empty:
|
||||||
|
snapped = _grid_footprint(geom)
|
||||||
|
if snapped is None or snapped.is_empty:
|
||||||
|
return None
|
||||||
|
gj = mapping(snapped)
|
||||||
|
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
||||||
|
# Defence-in-depth: re-validate the dict that actually reaches disk. Snapping
|
||||||
|
# makes the round exact, so this should already hold; repair once more on the
|
||||||
|
# grid if a pathological vertex still pinches a ring.
|
||||||
|
if not shape(out).is_valid:
|
||||||
|
snapped = _snap_polygonal(shape(out), _OUTPUT_PRECISION_DEG)
|
||||||
|
if snapped is None or snapped.is_empty:
|
||||||
|
return None
|
||||||
|
gj = mapping(snapped)
|
||||||
|
out = {"type": gj["type"], "coordinates": _round_coords(gj["coordinates"])}
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def write_district_geojson(
|
def write_district_geojson(
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,23 @@ from collections import Counter, defaultdict
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.spatial import cKDTree
|
from scipy.spatial import cKDTree
|
||||||
from shapely import STRtree, make_valid
|
from shapely import STRtree, make_valid
|
||||||
from shapely.geometry import MultiPolygon, Polygon
|
from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon
|
||||||
from shapely.ops import unary_union
|
|
||||||
|
|
||||||
|
from .geometry import safe_difference, safe_intersection, safe_union
|
||||||
from .voronoi import compute_voronoi_regions
|
from .voronoi import compute_voronoi_regions
|
||||||
|
|
||||||
MIN_GEOM_AREA = 0.01
|
MIN_GEOM_AREA = 0.01
|
||||||
|
|
||||||
|
# Minimal footprint (BNG metres) for a postcode whose UPRN seed wins no area in a
|
||||||
|
# crowded multi-postcode OA — its Voronoi cell ∩ remaining collapses below
|
||||||
|
# MIN_GEOM_AREA, or its seed sits inside an INSPIRE parcel wholly claimed by a
|
||||||
|
# co-located postcode. Every *active* postcode must keep a boundary
|
||||||
|
# (validate_outputs is zero-tolerance), so it gets a small disc at its true seed
|
||||||
|
# location. ~28 m² clears MIN_GEOM_AREA and output snapping; the overlap it
|
||||||
|
# creates with the area's winner is resolved at the output stage, where the
|
||||||
|
# smaller postcode wins the contested ground (see output._resolve_overlaps).
|
||||||
|
_MIN_SEED_FOOTPRINT_M = 3.0
|
||||||
|
|
||||||
|
|
||||||
def process_oa(
|
def process_oa(
|
||||||
oa_geom: Polygon | MultiPolygon,
|
oa_geom: Polygon | MultiPolygon,
|
||||||
|
|
@ -36,10 +46,12 @@ def process_oa(
|
||||||
|
|
||||||
# Compute remaining area
|
# Compute remaining area
|
||||||
if claimed:
|
if claimed:
|
||||||
all_claimed = unary_union(list(claimed.values()))
|
all_claimed = safe_union(list(claimed.values()))
|
||||||
all_claimed = _clean_polygonal(all_claimed)
|
all_claimed = _clean_polygonal(all_claimed)
|
||||||
remaining = (
|
remaining = (
|
||||||
valid_oa.difference(all_claimed) if all_claimed is not None else valid_oa
|
safe_difference(valid_oa, all_claimed)
|
||||||
|
if all_claimed is not None
|
||||||
|
else valid_oa
|
||||||
)
|
)
|
||||||
remaining = _clean_polygonal(remaining)
|
remaining = _clean_polygonal(remaining)
|
||||||
else:
|
else:
|
||||||
|
|
@ -60,13 +72,54 @@ def process_oa(
|
||||||
|
|
||||||
fragments = []
|
fragments = []
|
||||||
for pc, parts in result.items():
|
for pc, parts in result.items():
|
||||||
merged = _clean_polygonal(unary_union(parts))
|
merged = _clean_polygonal(safe_union(parts))
|
||||||
if merged is not None:
|
if merged is not None:
|
||||||
fragments.append((pc, merged))
|
fragments.append((pc, merged))
|
||||||
|
|
||||||
|
# Every postcode with a UPRN seed in this OA must keep at least a minimal
|
||||||
|
# footprint — in a dense OA (a block of flats with hundreds of distinct
|
||||||
|
# postcodes) a single-seed postcode's cell can collapse below MIN_GEOM_AREA or
|
||||||
|
# be fully absorbed by a co-located postcode's INSPIRE parcel, producing no
|
||||||
|
# fragment, and an active postcode must never be dropped.
|
||||||
|
orphans = unique_pcs - {pc for pc, _ in fragments}
|
||||||
|
if orphans:
|
||||||
|
fragments.extend(_seed_footprints(orphans, points, postcodes, valid_oa))
|
||||||
|
|
||||||
return fragments
|
return fragments
|
||||||
|
|
||||||
|
|
||||||
|
def _seed_footprints(
|
||||||
|
orphans: set[str],
|
||||||
|
points: np.ndarray,
|
||||||
|
postcodes: list[str],
|
||||||
|
valid_oa: Polygon | MultiPolygon,
|
||||||
|
) -> list[tuple[str, Polygon | MultiPolygon]]:
|
||||||
|
"""Give each orphan postcode a minimal disc footprint at its UPRN seed(s).
|
||||||
|
|
||||||
|
Orphans are postcodes with a UPRN in this OA that nonetheless won no area in
|
||||||
|
the INSPIRE/Voronoi partition. Each keeps a small disc at its true location,
|
||||||
|
clipped to the OA; the overlap with the area's winner is resolved at output.
|
||||||
|
"""
|
||||||
|
by_pc: dict[str, list] = defaultdict(list)
|
||||||
|
for i, pc in enumerate(postcodes):
|
||||||
|
if pc in orphans:
|
||||||
|
by_pc[pc].append(points[i])
|
||||||
|
|
||||||
|
out: list[tuple[str, Polygon | MultiPolygon]] = []
|
||||||
|
for pc, pts in by_pc.items():
|
||||||
|
arr = np.asarray(pts, dtype=np.float64)
|
||||||
|
seed = Point(arr[0]) if len(arr) == 1 else MultiPoint(arr)
|
||||||
|
disc = seed.buffer(_MIN_SEED_FOOTPRINT_M)
|
||||||
|
clipped = _clean_polygonal(safe_intersection(disc, valid_oa))
|
||||||
|
if clipped is None:
|
||||||
|
# Seed on/near the OA edge: keep the unclipped disc so the postcode
|
||||||
|
# still gets a footprint at its location rather than no boundary.
|
||||||
|
clipped = _clean_polygonal(disc)
|
||||||
|
if clipped is not None:
|
||||||
|
out.append((pc, clipped))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _claim_inspire_parcels(
|
def _claim_inspire_parcels(
|
||||||
valid_oa: Polygon | MultiPolygon,
|
valid_oa: Polygon | MultiPolygon,
|
||||||
points: np.ndarray,
|
points: np.ndarray,
|
||||||
|
|
@ -141,7 +194,7 @@ def _claim_inspire_parcels(
|
||||||
|
|
||||||
assignable = parcel
|
assignable = parcel
|
||||||
if contained_union is not None:
|
if contained_union is not None:
|
||||||
assignable = assignable.difference(contained_union)
|
assignable = safe_difference(assignable, contained_union)
|
||||||
for part in _polygon_parts(assignable):
|
for part in _polygon_parts(assignable):
|
||||||
part = _clean_polygonal(part)
|
part = _clean_polygonal(part)
|
||||||
if part is None:
|
if part is None:
|
||||||
|
|
@ -169,7 +222,7 @@ def _prepare_inspire_parcels(
|
||||||
continue
|
continue
|
||||||
if not geom.intersects(valid_oa):
|
if not geom.intersects(valid_oa):
|
||||||
continue
|
continue
|
||||||
clipped = _clean_polygonal(geom.intersection(valid_oa))
|
clipped = _clean_polygonal(safe_intersection(geom, valid_oa))
|
||||||
if clipped is not None:
|
if clipped is not None:
|
||||||
parcels.append(clipped)
|
parcels.append(clipped)
|
||||||
return parcels
|
return parcels
|
||||||
|
|
@ -199,7 +252,7 @@ def _merge_parts_by_postcode(
|
||||||
) -> dict[str, Polygon | MultiPolygon]:
|
) -> dict[str, Polygon | MultiPolygon]:
|
||||||
merged: dict[str, Polygon | MultiPolygon] = {}
|
merged: dict[str, Polygon | MultiPolygon] = {}
|
||||||
for pc, parts in parts_by_postcode.items():
|
for pc, parts in parts_by_postcode.items():
|
||||||
geom = _clean_polygonal(unary_union(parts))
|
geom = _clean_polygonal(safe_union(parts))
|
||||||
if geom is not None:
|
if geom is not None:
|
||||||
merged[pc] = geom
|
merged[pc] = geom
|
||||||
return merged
|
return merged
|
||||||
|
|
@ -210,7 +263,7 @@ def _union_claims(
|
||||||
) -> Polygon | MultiPolygon | None:
|
) -> Polygon | MultiPolygon | None:
|
||||||
if not claims:
|
if not claims:
|
||||||
return None
|
return None
|
||||||
return _clean_polygonal(unary_union([geom for _, geom in claims]))
|
return _clean_polygonal(safe_union([geom for _, geom in claims]))
|
||||||
|
|
||||||
|
|
||||||
def _resolve_ordered_claims(
|
def _resolve_ordered_claims(
|
||||||
|
|
@ -224,11 +277,11 @@ def _resolve_ordered_claims(
|
||||||
if geom is None:
|
if geom is None:
|
||||||
continue
|
continue
|
||||||
if used is not None:
|
if used is not None:
|
||||||
geom = _clean_polygonal(geom.difference(used))
|
geom = _clean_polygonal(safe_difference(geom, used))
|
||||||
if geom is None:
|
if geom is None:
|
||||||
continue
|
continue
|
||||||
resolved_parts[pc].append(geom)
|
resolved_parts[pc].append(geom)
|
||||||
used = _clean_polygonal(geom if used is None else unary_union([used, geom]))
|
used = _clean_polygonal(geom if used is None else safe_union([used, geom]))
|
||||||
return _merge_parts_by_postcode(resolved_parts)
|
return _merge_parts_by_postcode(resolved_parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -261,7 +314,7 @@ def _extract_polygonal(geom) -> Polygon | MultiPolygon | None:
|
||||||
# overlapping polygonal parts, and a MultiPolygon of overlapping parts is
|
# overlapping polygonal parts, and a MultiPolygon of overlapping parts is
|
||||||
# invalid — it double-counts area and makes the next `.difference()` raise
|
# invalid — it double-counts area and makes the next `.difference()` raise
|
||||||
# a TopologyException that aborts the OA (and, in parallel mode, the
|
# a TopologyException that aborts the OA (and, in parallel mode, the
|
||||||
# worker). unary_union merges them into a valid geometry.
|
# worker). safe_union merges them into a valid geometry.
|
||||||
merged = unary_union(polys)
|
merged = safe_union(polys)
|
||||||
return merged if not merged.is_empty else None
|
return merged if not merged.is_empty else None
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -757,6 +757,50 @@ class TestProcessOAInspireParcelAssignment:
|
||||||
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
|
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
|
||||||
|
|
||||||
|
|
||||||
|
class TestProcessOASeedFootprintGuarantee:
|
||||||
|
"""Every postcode with a UPRN seed in a multi-postcode OA must produce a
|
||||||
|
fragment, even when its partition cell collapses below MIN_GEOM_AREA — an
|
||||||
|
active postcode must never be dropped (validate_outputs is zero-tolerance)."""
|
||||||
|
|
||||||
|
def test_collapsed_voronoi_cells_rescued_as_footprints(self):
|
||||||
|
# OA just above MIN_GEOM_AREA; a 2-way Voronoi split leaves each cell
|
||||||
|
# (~0.0098 m²) below MIN_GEOM_AREA, so both would be dropped without rescue.
|
||||||
|
oa_geom = box(0, 0, 0.14, 0.14) # 0.0196 m²
|
||||||
|
points = np.array([[0.035, 0.07], [0.105, 0.07]])
|
||||||
|
postcodes = ["AA1 1AA", "AA1 1AB"]
|
||||||
|
|
||||||
|
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
|
||||||
|
got = {pc for pc, _ in fragments}
|
||||||
|
assert got == {"AA1 1AA", "AA1 1AB"}, f"dropped: {set(postcodes) - got}"
|
||||||
|
for _, geom in fragments:
|
||||||
|
assert geom.is_valid
|
||||||
|
assert geom.area > MIN_GEOM_AREA
|
||||||
|
|
||||||
|
def test_seed_footprint_sits_on_the_uprn(self):
|
||||||
|
from shapely.geometry import Point
|
||||||
|
|
||||||
|
from .process_oa import _seed_footprints
|
||||||
|
|
||||||
|
oa_geom = box(0, 0, 1000, 1000)
|
||||||
|
points = np.array([[500.0, 500.0]])
|
||||||
|
out = _seed_footprints({"AA1 1AA"}, points, ["AA1 1AA"], oa_geom)
|
||||||
|
|
||||||
|
assert len(out) == 1
|
||||||
|
pc, geom = out[0]
|
||||||
|
assert pc == "AA1 1AA"
|
||||||
|
assert geom.is_valid and geom.area > MIN_GEOM_AREA
|
||||||
|
assert geom.contains(Point(500, 500))
|
||||||
|
|
||||||
|
def test_only_orphans_get_footprints(self):
|
||||||
|
# A wins real area; B's lone seed collapses. Only B should be rescued, and
|
||||||
|
# A's genuine (large) fragment must be untouched by the rescue.
|
||||||
|
oa_geom = box(0, 0, 0.14, 0.14)
|
||||||
|
points = np.array([[0.07, 0.07], [0.07, 0.07], [0.105, 0.07]])
|
||||||
|
postcodes = ["AA1 1AA", "AA1 1AA", "AA1 1AB"]
|
||||||
|
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[])
|
||||||
|
assert {pc for pc, _ in fragments} == {"AA1 1AA", "AA1 1AB"}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# _extract_polygonal helper
|
# _extract_polygonal helper
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -1221,6 +1265,238 @@ class TestOutputPartition:
|
||||||
assert geoms["AA1 1AB"].area > 0
|
assert geoms["AA1 1AB"].area > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestGeojsonGeometrySliverValidity:
|
||||||
|
"""_geojson_geometry must emit geometry that is still valid after the final 6dp
|
||||||
|
rounding. A sub-grid overlap sliver (left by _resolve_overlaps' full-precision
|
||||||
|
difference) must be snapped away on the output grid rather than collapsed by a
|
||||||
|
naive round into a degenerate ('Too few points') / self-intersecting ring that
|
||||||
|
only fails once read back from disk."""
|
||||||
|
|
||||||
|
def test_subgrid_sliver_does_not_produce_invalid_ring(self):
|
||||||
|
from shapely.geometry import shape
|
||||||
|
from shapely.validation import explain_validity
|
||||||
|
|
||||||
|
from .output import _geojson_geometry
|
||||||
|
|
||||||
|
main = box(-0.34, 51.74, -0.33, 51.75) # ~degree-scale, valid
|
||||||
|
# A thin triangle whose three vertices all round to the same 1e-6 cell.
|
||||||
|
sliver = Polygon(
|
||||||
|
[
|
||||||
|
(-0.3400284, 51.7505061),
|
||||||
|
(-0.3400280, 51.7505060),
|
||||||
|
(-0.3400281, 51.7505063),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert sliver.is_valid
|
||||||
|
|
||||||
|
gj = _geojson_geometry(MultiPolygon([main, sliver]))
|
||||||
|
assert gj is not None
|
||||||
|
rt = shape(gj)
|
||||||
|
assert rt.is_valid, explain_validity(rt)
|
||||||
|
assert not rt.is_empty
|
||||||
|
# The real (main) area is preserved; only the degenerate sliver is dropped.
|
||||||
|
assert rt.area == pytest.approx(main.area, rel=1e-3)
|
||||||
|
|
||||||
|
def test_pure_sliver_is_rescued_not_written_invalid(self):
|
||||||
|
from shapely.geometry import shape
|
||||||
|
|
||||||
|
from .output import _OUTPUT_PRECISION_DEG, _geojson_geometry
|
||||||
|
|
||||||
|
# A sub-grid sliver (extent < 1e-6° in one dimension) that snaps to empty:
|
||||||
|
# it must be rescued into a minimal valid footprint at its location, never
|
||||||
|
# written invalid and never dropped (an active postcode keeps a boundary).
|
||||||
|
g = _OUTPUT_PRECISION_DEG
|
||||||
|
sliver = Polygon(
|
||||||
|
[
|
||||||
|
(-0.30, 51.75),
|
||||||
|
(-0.30 + 5 * g, 51.75),
|
||||||
|
(-0.30 + 5 * g, 51.75 + 0.2 * g),
|
||||||
|
(-0.30, 51.75 + 0.1 * g),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
gj = _geojson_geometry(sliver)
|
||||||
|
assert gj is not None # rescued, not dropped
|
||||||
|
rt = shape(gj)
|
||||||
|
assert rt.is_valid and not rt.is_empty
|
||||||
|
assert rt.distance(sliver) == pytest.approx(0.0, abs=1e-4)
|
||||||
|
|
||||||
|
|
||||||
|
class TestColocatedPostcodesAllRetained:
|
||||||
|
"""Co-located non-geographic postcodes (e.g. AL1 9xx) have heavily-overlapping
|
||||||
|
tiny footprints; the de-overlap pass trims most to sub-grid slivers. None may
|
||||||
|
be dropped — every active postcode must keep a (valid, non-empty) boundary."""
|
||||||
|
|
||||||
|
def test_overlapping_tiny_footprints_none_dropped(self, tmp_path):
|
||||||
|
from shapely.geometry import Point, shape
|
||||||
|
|
||||||
|
# 12 ~3 m discs within ~1 m of each other → after de-overlap most collapse
|
||||||
|
# below output precision.
|
||||||
|
postcodes = {}
|
||||||
|
for i in range(12):
|
||||||
|
e = 514000.0 + (i % 4) * 0.3
|
||||||
|
n = 206000.0 + (i // 4) * 0.3
|
||||||
|
postcodes[f"AL1 9{chr(65 + i)}{chr(65 + i)}"] = Point(e, n).buffer(3.0)
|
||||||
|
|
||||||
|
write_district_geojson(postcodes, tmp_path)
|
||||||
|
coll = json.loads((tmp_path / "units" / "AL1.geojson").read_text())
|
||||||
|
written = {f["properties"]["postcodes"] for f in coll["features"]}
|
||||||
|
assert written == set(postcodes), f"dropped: {set(postcodes) - written}"
|
||||||
|
for feature in coll["features"]:
|
||||||
|
geom = shape(feature["geometry"])
|
||||||
|
assert geom.is_valid and not geom.is_empty
|
||||||
|
|
||||||
|
|
||||||
|
class TestSafeOverlayHelpers:
|
||||||
|
"""The robust overlay helpers retry on a fixed-precision grid after a
|
||||||
|
GEOSException (e.g. ``side location conflict`` from near-coincident OA-seam
|
||||||
|
edges). The grid is a caller-supplied parameter: metres for the BNG stages,
|
||||||
|
1e-6 degrees for the WGS84 output stage — so the same helper serves both
|
||||||
|
without crushing degree-scale shapes on the metre default."""
|
||||||
|
|
||||||
|
def test_grid_param_honored_on_clean_inputs(self):
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
a = box(0, 0, 10, 10)
|
||||||
|
b = box(5, 0, 15, 10)
|
||||||
|
# No exception → exact result, identical regardless of the fallback grid.
|
||||||
|
assert geometry.safe_difference(a, b, grid=1e-6).equals(a.difference(b))
|
||||||
|
assert geometry.safe_union([a, b], grid=1e-6).equals(unary_union([a, b]))
|
||||||
|
assert geometry.safe_intersection(a, b, grid=1e-6).equals(a.intersection(b))
|
||||||
|
|
||||||
|
def test_difference_falls_back_to_fixed_precision_overlay(self, monkeypatch):
|
||||||
|
from shapely import GEOSException
|
||||||
|
from shapely.geometry.base import BaseGeometry
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
a = box(0, 0, 10, 10)
|
||||||
|
b = box(5, 0, 15, 10)
|
||||||
|
|
||||||
|
def _boom(self, other, *args, **kwargs):
|
||||||
|
raise GEOSException("forced side location conflict")
|
||||||
|
|
||||||
|
# Patch the .difference() *method*; the fallback uses the top-level
|
||||||
|
# shapely.difference() function, so it still completes.
|
||||||
|
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||||
|
result = geometry.safe_difference(a, b, grid=1e-6)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.area == pytest.approx(50.0) # left half of `a`
|
||||||
|
|
||||||
|
def test_union_falls_back_to_fixed_precision_overlay(self, monkeypatch):
|
||||||
|
from shapely import GEOSException
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
a = box(0, 0, 10, 10)
|
||||||
|
b = box(5, 0, 15, 10)
|
||||||
|
|
||||||
|
def _boom(_geoms):
|
||||||
|
raise GEOSException("forced robustness failure")
|
||||||
|
|
||||||
|
monkeypatch.setattr(geometry, "unary_union", _boom)
|
||||||
|
result = geometry.safe_union([a, b], grid=1e-6)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.area == pytest.approx(150.0) # 100 + 100 - 50 overlap
|
||||||
|
|
||||||
|
# A self-intersecting bow-tie: invalid. set_precision()'s DEFAULT
|
||||||
|
# ('valid_output') mode runs its own noding pass that re-raises
|
||||||
|
# 'side location conflict' on this — which is exactly how the production
|
||||||
|
# crash happened (the fallback re-raised the error it was meant to absorb).
|
||||||
|
_BOWTIE = Polygon([(0, 0), (1e-5, 1e-5), (1e-5, 0), (0, 1e-5), (0, 0)])
|
||||||
|
# make_valid() of this spiky ring returns a mixed-dimension
|
||||||
|
# GeometryCollection (polygon + dangling line); OverlayNG rejects that with
|
||||||
|
# 'Overlay input is mixed-dimension', so the fallback must strip the debris.
|
||||||
|
_SPIKY = Polygon(
|
||||||
|
[(0, 0), (10, 0), (10, 10), (5, 5), (5.0000001, 5), (0, 10), (0, 0), (15, -5), (0, 0)]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_difference_fallback_survives_invalid_input(self, monkeypatch):
|
||||||
|
"""Regression for the production crash: the fallback must not reduce
|
||||||
|
precision with set_precision's default valid_output mode, which re-raises
|
||||||
|
'side location conflict' on an invalid geometry."""
|
||||||
|
from shapely import GEOSException
|
||||||
|
from shapely.geometry.base import BaseGeometry
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
assert not self._BOWTIE.is_valid # precondition
|
||||||
|
|
||||||
|
def _boom(self, other, *a, **k):
|
||||||
|
raise GEOSException("forced side location conflict")
|
||||||
|
|
||||||
|
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||||
|
result = geometry.safe_difference(self._BOWTIE, box(0, 0, 5e-6, 5e-6), grid=1e-6)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||||
|
|
||||||
|
def test_difference_fallback_survives_make_valid_geometrycollection(
|
||||||
|
self, monkeypatch
|
||||||
|
):
|
||||||
|
"""Regression: make_valid can yield a mixed-dimension GeometryCollection
|
||||||
|
that OverlayNG rejects; the fallback must keep only polygonal parts."""
|
||||||
|
from shapely import GEOSException, make_valid
|
||||||
|
from shapely.geometry.base import BaseGeometry
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
assert make_valid(self._SPIKY).geom_type == "GeometryCollection" # precondition
|
||||||
|
|
||||||
|
def _boom(self, other, *a, **k):
|
||||||
|
raise GEOSException("forced side location conflict")
|
||||||
|
|
||||||
|
monkeypatch.setattr(BaseGeometry, "difference", _boom)
|
||||||
|
result = geometry.safe_difference(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||||
|
assert result.area > 0
|
||||||
|
|
||||||
|
def test_intersection_fallback_survives_make_valid_geometrycollection(
|
||||||
|
self, monkeypatch
|
||||||
|
):
|
||||||
|
from shapely import GEOSException
|
||||||
|
from shapely.geometry.base import BaseGeometry
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
def _boom(self, other, *a, **k):
|
||||||
|
raise GEOSException("forced side location conflict")
|
||||||
|
|
||||||
|
monkeypatch.setattr(BaseGeometry, "intersection", _boom)
|
||||||
|
result = geometry.safe_intersection(self._SPIKY, box(2, 2, 8, 8), grid=1e-4)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||||
|
assert result.area > 0
|
||||||
|
|
||||||
|
def test_union_fallback_survives_invalid_and_collection_inputs(self, monkeypatch):
|
||||||
|
"""The union fallback must absorb both an invalid bow-tie and a
|
||||||
|
make_valid-becomes-GeometryCollection input without raising."""
|
||||||
|
from shapely import GEOSException
|
||||||
|
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
def _boom(_geoms):
|
||||||
|
raise GEOSException("forced robustness failure")
|
||||||
|
|
||||||
|
monkeypatch.setattr(geometry, "unary_union", _boom)
|
||||||
|
result = geometry.safe_union(
|
||||||
|
[self._BOWTIE, self._SPIKY, box(20, 20, 30, 30)], grid=1e-4
|
||||||
|
)
|
||||||
|
assert result.is_valid
|
||||||
|
assert result.geom_type in ("Polygon", "MultiPolygon")
|
||||||
|
|
||||||
|
def test_helpers_never_raise_on_empty_inputs(self):
|
||||||
|
"""Degenerate/empty inputs must not abort a multi-hour run."""
|
||||||
|
from . import geometry
|
||||||
|
|
||||||
|
empty = Polygon()
|
||||||
|
a = box(0, 0, 10, 10)
|
||||||
|
assert geometry.safe_difference(empty, a).is_empty
|
||||||
|
assert geometry.safe_difference(a, empty).equals(a)
|
||||||
|
assert geometry.safe_intersection(empty, a).is_empty
|
||||||
|
assert geometry.safe_union([]).is_empty
|
||||||
|
assert geometry.safe_union([empty]).is_empty
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# InspireIndex must return the same candidates as a brute-force bbox scan
|
# InspireIndex must return the same candidates as a brute-force bbox scan
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@ import numpy as np
|
||||||
from scipy.spatial import QhullError, Voronoi
|
from scipy.spatial import QhullError, Voronoi
|
||||||
from shapely import make_valid
|
from shapely import make_valid
|
||||||
from shapely.geometry import MultiPolygon, Polygon
|
from shapely.geometry import MultiPolygon, Polygon
|
||||||
from shapely.ops import unary_union
|
|
||||||
|
from .geometry import safe_intersection, safe_union
|
||||||
|
|
||||||
|
|
||||||
def compute_voronoi_regions(
|
def compute_voronoi_regions(
|
||||||
|
|
@ -111,13 +112,13 @@ def compute_voronoi_regions(
|
||||||
poly = Polygon(vertices)
|
poly = Polygon(vertices)
|
||||||
if not poly.is_valid:
|
if not poly.is_valid:
|
||||||
poly = make_valid(poly)
|
poly = make_valid(poly)
|
||||||
clipped = poly.intersection(boundary)
|
clipped = safe_intersection(poly, boundary)
|
||||||
if not clipped.is_empty:
|
if not clipped.is_empty:
|
||||||
pc_polys[unique_pcs[i]].append(clipped)
|
pc_polys[unique_pcs[i]].append(clipped)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
for pc, parts in pc_polys.items():
|
for pc, parts in pc_polys.items():
|
||||||
merged = unary_union(parts)
|
merged = safe_union(parts)
|
||||||
if not merged.is_empty:
|
if not merged.is_empty:
|
||||||
result[pc] = merged
|
result[pc] = merged
|
||||||
return result
|
return result
|
||||||
|
|
@ -143,7 +144,7 @@ def _equal_split_fallback(
|
||||||
(min_x, strip_max_y),
|
(min_x, strip_max_y),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
clipped = boundary.intersection(strip)
|
clipped = safe_intersection(boundary, strip)
|
||||||
if not clipped.is_empty:
|
if not clipped.is_empty:
|
||||||
result[pc] = clipped
|
result[pc] = clipped
|
||||||
return result
|
return result
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,53 @@ def test_accumulate_clipped_area_drops_missing_and_empty_geometry() -> None:
|
||||||
assert canopy_area[0] == pytest.approx(400.0)
|
assert canopy_area[0] == pytest.approx(400.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_accumulate_clipped_area_survives_invalid_polygon() -> None:
|
||||||
|
"""A self-intersecting external polygon (TOW/NFI data is occasionally invalid)
|
||||||
|
must not abort the batched overlay with 'TopologyException: side location
|
||||||
|
conflict'; its repaired in-buffer area is still accumulated."""
|
||||||
|
radius_m = 50
|
||||||
|
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||||
|
circles, tree = _postcode_buffers(points, radius_m)
|
||||||
|
|
||||||
|
# Bow-tie centred on A: self-intersecting => invalid. The raw batched
|
||||||
|
# shapely.intersection raises 'side location conflict' on it; make_valid splits
|
||||||
|
# it into two triangles of total area 200, fully inside A's radius-50 buffer.
|
||||||
|
bowtie = shapely.Polygon([(-10, -10), (10, 10), (10, -10), (-10, 10), (-10, -10)])
|
||||||
|
assert not shapely.is_valid(bowtie) # precondition
|
||||||
|
with pytest.raises(shapely.errors.GEOSException): # documents the raw hazard
|
||||||
|
shapely.intersection(
|
||||||
|
np.array([bowtie], dtype=object), np.array([circles[0]], dtype=object)
|
||||||
|
)
|
||||||
|
|
||||||
|
canopy_area = np.zeros(1)
|
||||||
|
_accumulate_clipped_area(np.array([bowtie], dtype=object), circles, tree, canopy_area)
|
||||||
|
assert canopy_area[0] == pytest.approx(200.0, rel=1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_robust_intersection_area_recovers_from_overlay_failure(monkeypatch) -> None:
|
||||||
|
"""The batched-overlay fallback must absorb a GEOSException from the fast path
|
||||||
|
and recover (validate + retry), returning the correct per-pair areas. Version
|
||||||
|
independent: the fast-path failure is forced rather than data-dependent."""
|
||||||
|
from pipeline.transform import tree_density
|
||||||
|
|
||||||
|
real_intersection = shapely.intersection
|
||||||
|
calls = {"n": 0}
|
||||||
|
|
||||||
|
def flaky(a, b, **kwargs):
|
||||||
|
calls["n"] += 1
|
||||||
|
if calls["n"] == 1: # fail only the first (fast-path) call
|
||||||
|
raise shapely.errors.GEOSException("forced side location conflict")
|
||||||
|
return real_intersection(a, b, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(tree_density.shapely, "intersection", flaky)
|
||||||
|
|
||||||
|
a = np.array([shapely.box(0, 0, 10, 10), shapely.box(0, 0, 4, 4)], dtype=object)
|
||||||
|
b = np.array([shapely.box(0, 0, 6, 6), shapely.box(0, 0, 2, 2)], dtype=object)
|
||||||
|
out = tree_density._robust_intersection_area(a, b)
|
||||||
|
assert calls["n"] >= 2 # fast path failed -> fallback path executed
|
||||||
|
assert out.tolist() == pytest.approx([36.0, 4.0])
|
||||||
|
|
||||||
|
|
||||||
def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
|
def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
|
||||||
radius_m = 50
|
radius_m = 50
|
||||||
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||||
|
|
|
||||||
|
|
@ -263,6 +263,43 @@ def _postcode_buffers(
|
||||||
return circles, shapely.STRtree(circles)
|
return circles, shapely.STRtree(circles)
|
||||||
|
|
||||||
|
|
||||||
|
# 0.1 mm in the BNG working CRS (EPSG:27700) — far below survey resolution; the
|
||||||
|
# same grid the postcode_boundaries overlay uses.
|
||||||
|
_OVERLAY_GRID_M = 1e-4
|
||||||
|
|
||||||
|
|
||||||
|
def _robust_intersection_area(a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
||||||
|
"""Vectorized ``area(a[i] ∩ b[i])`` that survives GEOS robustness failures.
|
||||||
|
|
||||||
|
External Forest Research TOW/NFI polygons are occasionally invalid
|
||||||
|
(self-intersections), and a single bad polygon makes the batched
|
||||||
|
``shapely.intersection`` raise ``TopologyException: side location conflict``,
|
||||||
|
aborting the whole run. The fast path is the raw batched overlay — unchanged,
|
||||||
|
full-speed, when the data is clean — and only a failure triggers repair.
|
||||||
|
|
||||||
|
The repair deliberately uses a *plain* overlay rather than the fixed-precision
|
||||||
|
(``grid_size``) one: ``make_valid`` can emit a mixed-dimension
|
||||||
|
``GeometryCollection`` (a polygon plus a dangling line), which OverlayNG
|
||||||
|
rejects with ``Overlay input is mixed-dimension`` — whereas a plain overlay
|
||||||
|
accepts it, and its non-polygonal debris has zero area and is dropped by the
|
||||||
|
``clipped_area > 0`` filter downstream anyway. A final pointwise coordinate
|
||||||
|
snap (which never raises) collapses the near-coincident edges behind any
|
||||||
|
residual full-precision robustness failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return shapely.area(shapely.intersection(a, b))
|
||||||
|
except shapely.errors.GEOSException:
|
||||||
|
pass
|
||||||
|
a = shapely.make_valid(a)
|
||||||
|
b = shapely.make_valid(b)
|
||||||
|
try:
|
||||||
|
return shapely.area(shapely.intersection(a, b))
|
||||||
|
except shapely.errors.GEOSException:
|
||||||
|
a = shapely.make_valid(shapely.set_precision(a, _OVERLAY_GRID_M, mode="pointwise"))
|
||||||
|
b = shapely.make_valid(shapely.set_precision(b, _OVERLAY_GRID_M, mode="pointwise"))
|
||||||
|
return shapely.area(shapely.intersection(a, b))
|
||||||
|
|
||||||
|
|
||||||
def _accumulate_clipped_area(
|
def _accumulate_clipped_area(
|
||||||
geoms: np.ndarray,
|
geoms: np.ndarray,
|
||||||
circles: np.ndarray,
|
circles: np.ndarray,
|
||||||
|
|
@ -294,8 +331,8 @@ def _accumulate_clipped_area(
|
||||||
if geom_index.size == 0:
|
if geom_index.size == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
clipped_area = shapely.area(
|
clipped_area = _robust_intersection_area(
|
||||||
shapely.intersection(geoms[geom_index], circles[postcode_index])
|
geoms[geom_index], circles[postcode_index]
|
||||||
)
|
)
|
||||||
positive = clipped_area > 0
|
positive = clipped_area > 0
|
||||||
geom_index = geom_index[positive]
|
geom_index = geom_index[positive]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue