perfect-postcode/pipeline/transform/postcode_boundaries/greenspace.py
2026-02-15 22:39:53 +00:00

65 lines
2 KiB
Python

"""Load greenspace/water polygons and subtract them from postcode boundaries."""
from pathlib import Path
import polars as pl
from shapely import wkb
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
def load_greenspace(path: Path) -> tuple[STRtree, list]:
"""Load greenspace parquet and build an STRtree spatial index.
Returns:
(tree, geoms) where tree is a Shapely STRtree and geoms is
the list of geometries indexed by the tree.
"""
df = pl.read_parquet(path)
geoms = [wkb.loads(g) for g in df["geometry"].to_list()]
tree = STRtree(geoms)
return tree, geoms
MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed
def subtract_greenspace(
postcode_geom: Polygon | MultiPolygon,
tree: STRtree,
geoms: list,
) -> Polygon | MultiPolygon:
"""Subtract park/water polygons that overlap the postcode geometry.
Uses the STRtree for fast candidate lookup, then subtracts the union
of intersecting greenspace from the postcode polygon. If subtraction
would remove >90% of the area, keeps the original (the postcode
genuinely covers that land, e.g. churchyards, riverside addresses).
"""
candidate_idxs = tree.query(postcode_geom)
if len(candidate_idxs) == 0:
return postcode_geom
# Collect geometries that actually intersect (not just bbox overlap)
intersecting = []
for idx in candidate_idxs:
g = geoms[idx]
if g.intersects(postcode_geom):
intersecting.append(g)
if not intersecting:
return postcode_geom
green_union = unary_union(intersecting)
result = postcode_geom.difference(green_union)
if result.is_empty:
return postcode_geom
# Don't over-trim postcodes that genuinely cover green/water areas
original_area = postcode_geom.area
if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION):
return postcode_geom
return result