Improve data pipeline
This commit is contained in:
parent
e8345cbdc1
commit
f99bd4e5c9
36 changed files with 966 additions and 129 deletions
|
|
@ -5,6 +5,7 @@ from pathlib import Path
|
|||
|
||||
from pyproj import Transformer
|
||||
from shapely import make_valid, set_precision
|
||||
from shapely.errors import GEOSException
|
||||
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
|
||||
from shapely.ops import transform as transform_geometry
|
||||
from shapely.ops import unary_union
|
||||
|
|
@ -43,7 +44,14 @@ def _largest_polygonal(geom) -> Polygon | None:
|
|||
def to_wgs84_geojson(
|
||||
geom: Polygon | MultiPolygon, tolerance: float = 1.0
|
||||
) -> dict | None:
|
||||
"""Simplify geometry in BNG, convert to WGS84, return GeoJSON dict."""
|
||||
"""Simplify geometry in BNG, convert to WGS84, return a valid GeoJSON dict.
|
||||
|
||||
Validates the *serialized* GeoJSON dict (via a ``shape()`` round-trip), not
|
||||
just the intermediate Shapely object: coordinate snapping during
|
||||
serialization can otherwise leave a self-intersecting ring that only shows up
|
||||
once the feature is read back from disk. Any such geometry is repaired with
|
||||
``make_valid`` before returning so written features are always valid.
|
||||
"""
|
||||
geom = _largest_polygonal(geom)
|
||||
if geom is None:
|
||||
return None
|
||||
|
|
@ -55,12 +63,28 @@ def to_wgs84_geojson(
|
|||
|
||||
transformer = _get_to_wgs84()
|
||||
wgs84 = transform_geometry(transformer.transform, simplified)
|
||||
wgs84 = set_precision(wgs84, 0.000001, mode="valid_output")
|
||||
try:
|
||||
wgs84 = set_precision(wgs84, 0.000001, mode="valid_output")
|
||||
except GEOSException:
|
||||
# Precision snapping can fail on pathological geometries; fall back to a
|
||||
# plain validity repair without coordinate snapping.
|
||||
wgs84 = make_valid(wgs84)
|
||||
wgs84 = _largest_polygonal(wgs84)
|
||||
if wgs84 is None:
|
||||
return None
|
||||
|
||||
return mapping(wgs84)
|
||||
geojson_dict = mapping(wgs84)
|
||||
|
||||
# The geometry that actually reaches disk is the GeoJSON dict, so validate
|
||||
# *that* (not the pre-serialization object) and repair if needed.
|
||||
round_trip = shape(geojson_dict)
|
||||
if round_trip.is_empty or not round_trip.is_valid:
|
||||
round_trip = _largest_polygonal(make_valid(round_trip))
|
||||
if round_trip is None or round_trip.is_empty:
|
||||
return None
|
||||
geojson_dict = mapping(round_trip)
|
||||
|
||||
return geojson_dict
|
||||
|
||||
|
||||
def _fill_holes(geom):
|
||||
|
|
@ -119,7 +143,11 @@ def merge_fragments(
|
|||
pre_green = combined
|
||||
combined = subtract_greenspace(combined, greenspace_tree, greenspace_geoms)
|
||||
combined = _largest_polygon(combined)
|
||||
combined = _fill_holes(combined)
|
||||
# Do NOT _fill_holes here: interior holes carved by the greenspace
|
||||
# subtraction (lakes, enclosed parks) are intentional, not artifacts.
|
||||
# Filling them would re-add the removed area and negate the
|
||||
# subtraction. Artifact holes from the INSPIRE+Voronoi+make_valid
|
||||
# chain were already removed by the _fill_holes above (pre-subtraction).
|
||||
# Revert if subtraction + fragment selection lost >90% of area
|
||||
if pre_green.area > 0 and combined.area / pre_green.area < 0.1:
|
||||
combined = pre_green
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue