Fix data pipelines once and for all

This commit is contained in:
Andras Schmelczer 2026-06-10 21:27:32 +01:00
parent 08560476c5
commit 4012e4e047
46 changed files with 4508 additions and 855 deletions

View file

@ -101,6 +101,21 @@ def _snap_to_wgs84_geojson(geom_bng: Polygon | MultiPolygon) -> dict | None:
return geojson_dict
def _is_pointlike(geom_bng) -> bool:
"""True if a BNG geometry carries no real extent (tower-block signature).
Near-zero area AND short perimeter together distinguish a collapsed point
from a genuine thin sliver, which still carries length.
"""
try:
return (
geom_bng.area < _POINTLIKE_AREA_M2
and geom_bng.length < _POINTLIKE_PERIMETER_M
)
except GEOSException:
return False
def _rescue_footprint(geom_bng) -> dict | None:
"""Fatten a degenerate BNG geometry into a representable footprint and snap.
@ -109,15 +124,9 @@ def _rescue_footprint(geom_bng) -> dict | None:
gets a building-scale buffer so it is not reduced to an invisible sub-metre
dot; thin slivers that still carry length keep the minimal buffer.
"""
buffer_m = _MIN_FOOTPRINT_BUFFER_M
try:
if (
geom_bng.area < _POINTLIKE_AREA_M2
and geom_bng.length < _POINTLIKE_PERIMETER_M
):
buffer_m = _POINT_RESCUE_BUFFER_M
except GEOSException:
pass
buffer_m = (
_POINT_RESCUE_BUFFER_M if _is_pointlike(geom_bng) else _MIN_FOOTPRINT_BUFFER_M
)
footprint = _largest_polygonal(geom_bng.buffer(buffer_m))
if footprint is None:
return None
@ -147,10 +156,16 @@ def to_wgs84_geojson(
)
if simplified is None:
simplified = cleaned
# Normal path; if snapping erases a thin sliver, fatten its real shape.
result = _snap_to_wgs84_geojson(simplified)
if result is None:
if _is_pointlike(simplified):
# A POINTLIKE footprint is rescued to building scale even when it
# would survive snapping: a 0.1-1 m² polygon serializes fine but
# ships as an invisible dot covering a whole tower block.
result = _rescue_footprint(simplified)
else:
# Normal path; if snapping erases a thin sliver, fatten its real shape.
result = _snap_to_wgs84_geojson(simplified)
if result is None:
result = _rescue_footprint(simplified)
if result is not None:
return result
@ -229,6 +244,10 @@ def merge_fragments(
greenspace_tree: Optional STRtree of park/water polygons.
greenspace_geoms: Optional list of park/water geometries (indexed by tree).
"""
subtract = greenspace_tree is not None and greenspace_geoms is not None
if subtract:
from .greenspace import subtract_greenspace
by_postcode: dict[str, list] = defaultdict(list)
for pc, geom in all_fragments:
by_postcode[pc].append(geom)
@ -256,9 +275,7 @@ def merge_fragments(
# Remove artifact interior holes from INSPIRE+Voronoi+make_valid chain
combined = _fill_holes(combined)
# Subtract parks/water if provided
if greenspace_tree is not None and greenspace_geoms is not None:
from .greenspace import subtract_greenspace
if subtract:
pre_green = combined
combined = subtract_greenspace(combined, greenspace_tree, greenspace_geoms)
combined = _keep_polygon_parts(combined)