perfect-postcode/pipeline/transform/postcode_boundaries/greenspace.py

77 lines
2.4 KiB
Python

"""Load greenspace/water polygons and subtract them from postcode boundaries."""
from pathlib import Path
import polars as pl
from shapely import make_valid, wkb
from shapely.geometry import MultiPolygon, Polygon
from shapely.strtree import STRtree
from .geometry import safe_difference, safe_union
def load_greenspace(path: Path) -> tuple[STRtree, list]:
"""Load greenspace parquet and build an STRtree spatial index.
Geometries are repaired with ``make_valid`` on load: an invalid park/lake
polygon would make the per-postcode ``intersects`` predicate (and the exact
difference path) liable to raise mid-merge, hours into a build. Empty
geometries are dropped.
Returns:
(tree, geoms) where tree is a Shapely STRtree and geoms is
the list of geometries indexed by the tree.
"""
df = pl.read_parquet(path)
geoms = []
for raw in df["geometry"].to_list():
geom = wkb.loads(raw)
if not geom.is_valid:
geom = make_valid(geom)
if not geom.is_empty:
geoms.append(geom)
tree = STRtree(geoms)
return tree, geoms
MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed
def subtract_greenspace(
postcode_geom: Polygon | MultiPolygon,
tree: STRtree,
geoms: list,
) -> Polygon | MultiPolygon:
"""Subtract park/water polygons that overlap the postcode geometry.
Uses the STRtree for fast candidate lookup, then subtracts the union
of intersecting greenspace from the postcode polygon. If subtraction
would remove >90% of the area, keeps the original (the postcode
genuinely covers that land, e.g. churchyards, riverside addresses).
"""
candidate_idxs = tree.query(postcode_geom)
if len(candidate_idxs) == 0:
return postcode_geom
# Collect geometries that actually intersect (not just bbox overlap)
intersecting = []
for idx in candidate_idxs:
g = geoms[idx]
if g.intersects(postcode_geom):
intersecting.append(g)
if not intersecting:
return postcode_geom
green_union = safe_union(intersecting)
result = safe_difference(postcode_geom, green_union)
if result.is_empty:
return postcode_geom
# Don't over-trim postcodes that genuinely cover green/water areas
original_area = postcode_geom.area
if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION):
return postcode_geom
return result