"""Load greenspace/water polygons and subtract them from postcode boundaries.""" from pathlib import Path import polars as pl from shapely import wkb from shapely.geometry import MultiPolygon, Polygon from shapely.ops import unary_union from shapely.strtree import STRtree def load_greenspace(path: Path) -> tuple[STRtree, list]: """Load greenspace parquet and build an STRtree spatial index. Returns: (tree, geoms) where tree is a Shapely STRtree and geoms is the list of geometries indexed by the tree. """ df = pl.read_parquet(path) geoms = [wkb.loads(g) for g in df["geometry"].to_list()] tree = STRtree(geoms) return tree, geoms MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed def subtract_greenspace( postcode_geom: Polygon | MultiPolygon, tree: STRtree, geoms: list, ) -> Polygon | MultiPolygon: """Subtract park/water polygons that overlap the postcode geometry. Uses the STRtree for fast candidate lookup, then subtracts the union of intersecting greenspace from the postcode polygon. If subtraction would remove >90% of the area, keeps the original (the postcode genuinely covers that land, e.g. churchyards, riverside addresses). """ candidate_idxs = tree.query(postcode_geom) if len(candidate_idxs) == 0: return postcode_geom # Collect geometries that actually intersect (not just bbox overlap) intersecting = [] for idx in candidate_idxs: g = geoms[idx] if g.intersects(postcode_geom): intersecting.append(g) if not intersecting: return postcode_geom green_union = unary_union(intersecting) result = postcode_geom.difference(green_union) if result.is_empty: return postcode_geom # Don't over-trim postcodes that genuinely cover green/water areas original_area = postcode_geom.area if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION): return postcode_geom return result