"""Load greenspace/water polygons and subtract them from postcode boundaries.""" from pathlib import Path import polars as pl from shapely import make_valid, wkb from shapely.geometry import MultiPolygon, Polygon from shapely.strtree import STRtree from .geometry import safe_difference, safe_union def load_greenspace(path: Path) -> tuple[STRtree, list]: """Load greenspace parquet and build an STRtree spatial index. Geometries are repaired with ``make_valid`` on load: an invalid park/lake polygon would make the per-postcode ``intersects`` predicate (and the exact difference path) liable to raise mid-merge, hours into a build. Empty geometries are dropped. Returns: (tree, geoms) where tree is a Shapely STRtree and geoms is the list of geometries indexed by the tree. """ df = pl.read_parquet(path) geoms = [] for raw in df["geometry"].to_list(): geom = wkb.loads(raw) if not geom.is_valid: geom = make_valid(geom) if not geom.is_empty: geoms.append(geom) tree = STRtree(geoms) return tree, geoms MAX_REMOVAL_FRACTION = 0.9 # Keep original if >90% would be removed def subtract_greenspace( postcode_geom: Polygon | MultiPolygon, tree: STRtree, geoms: list, ) -> Polygon | MultiPolygon: """Subtract park/water polygons that overlap the postcode geometry. Uses the STRtree for fast candidate lookup, then subtracts the union of intersecting greenspace from the postcode polygon. If subtraction would remove >90% of the area, keeps the original (the postcode genuinely covers that land, e.g. churchyards, riverside addresses). """ candidate_idxs = tree.query(postcode_geom) if len(candidate_idxs) == 0: return postcode_geom # Collect geometries that actually intersect (not just bbox overlap) intersecting = [] for idx in candidate_idxs: g = geoms[idx] if g.intersects(postcode_geom): intersecting.append(g) if not intersecting: return postcode_geom green_union = safe_union(intersecting) result = safe_difference(postcode_geom, green_union) if result.is_empty: return postcode_geom # Don't over-trim postcodes that genuinely cover green/water areas original_area = postcode_geom.area if original_area > 0 and result.area / original_area < (1 - MAX_REMOVAL_FRACTION): return postcode_geom return result