perfect-postcode/pipeline/transform/postcode_boundaries/greenspace.py
2026-06-04 20:40:42 +01:00

66 lines
2 KiB
Python

"""Load greenspace/water polygons and subtract them from postcode boundaries."""
from pathlib import Path
import polars as pl
from shapely import wkb
from shapely.geometry import MultiPolygon, Polygon
from shapely.strtree import STRtree
from .geometry import safe_difference, safe_union
def load_greenspace(path: Path) -> tuple[STRtree, list]:
"""Load greenspace parquet and build an STRtree spatial index.
Returns:
(tree, geoms) where tree is a Shapely STRtree and geoms is
the list of geometries indexed by the tree.
"""
df = pl.read_parquet(path)
geoms = [wkb.loads(g) for g in df["geometry"].to_list()]
tree = STRtree(geoms)
return tree, geoms
MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed
def subtract_greenspace(
postcode_geom: Polygon | MultiPolygon,
tree: STRtree,
geoms: list,
) -> Polygon | MultiPolygon:
"""Subtract park/water polygons that overlap the postcode geometry.
Uses the STRtree for fast candidate lookup, then subtracts the union
of intersecting greenspace from the postcode polygon. If subtraction
would remove >90% of the area, keeps the original (the postcode
genuinely covers that land, e.g. churchyards, riverside addresses).
"""
candidate_idxs = tree.query(postcode_geom)
if len(candidate_idxs) == 0:
return postcode_geom
# Collect geometries that actually intersect (not just bbox overlap)
intersecting = []
for idx in candidate_idxs:
g = geoms[idx]
if g.intersects(postcode_geom):
intersecting.append(g)
if not intersecting:
return postcode_geom
green_union = safe_union(intersecting)
result = safe_difference(postcode_geom, green_union)
if result.is_empty:
return postcode_geom
# Don't over-trim postcodes that genuinely cover green/water areas
original_area = postcode_geom.area
if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION):
return postcode_geom
return result