65 lines
2 KiB
Python
65 lines
2 KiB
Python
"""Load greenspace/water polygons and subtract them from postcode boundaries."""
|
|
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
from shapely import wkb
|
|
from shapely.geometry import MultiPolygon, Polygon
|
|
from shapely.ops import unary_union
|
|
from shapely.strtree import STRtree
|
|
|
|
|
|
def load_greenspace(path: Path) -> tuple[STRtree, list]:
|
|
"""Load greenspace parquet and build an STRtree spatial index.
|
|
|
|
Returns:
|
|
(tree, geoms) where tree is a Shapely STRtree and geoms is
|
|
the list of geometries indexed by the tree.
|
|
"""
|
|
df = pl.read_parquet(path)
|
|
geoms = [wkb.loads(g) for g in df["geometry"].to_list()]
|
|
tree = STRtree(geoms)
|
|
return tree, geoms
|
|
|
|
|
|
MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed
|
|
|
|
|
|
def subtract_greenspace(
|
|
postcode_geom: Polygon | MultiPolygon,
|
|
tree: STRtree,
|
|
geoms: list,
|
|
) -> Polygon | MultiPolygon:
|
|
"""Subtract park/water polygons that overlap the postcode geometry.
|
|
|
|
Uses the STRtree for fast candidate lookup, then subtracts the union
|
|
of intersecting greenspace from the postcode polygon. If subtraction
|
|
would remove >90% of the area, keeps the original (the postcode
|
|
genuinely covers that land, e.g. churchyards, riverside addresses).
|
|
"""
|
|
candidate_idxs = tree.query(postcode_geom)
|
|
if len(candidate_idxs) == 0:
|
|
return postcode_geom
|
|
|
|
# Collect geometries that actually intersect (not just bbox overlap)
|
|
intersecting = []
|
|
for idx in candidate_idxs:
|
|
g = geoms[idx]
|
|
if g.intersects(postcode_geom):
|
|
intersecting.append(g)
|
|
|
|
if not intersecting:
|
|
return postcode_geom
|
|
|
|
green_union = unary_union(intersecting)
|
|
result = postcode_geom.difference(green_union)
|
|
|
|
if result.is_empty:
|
|
return postcode_geom
|
|
|
|
# Don't over-trim postcodes that genuinely cover green/water areas
|
|
original_area = postcode_geom.area
|
|
if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION):
|
|
return postcode_geom
|
|
|
|
return result
|