import argparse from pathlib import Path from shapely.geometry import MultiPolygon, Polygon from tqdm import tqdm from .inspire import ( cache_inspire, get_inspire_candidates, inspire_cache_exists, load_inspire, ) from .memory import release_memory from .oa_boundaries import load_oa_boundaries from .output import merge_fragments, write_district_geojson from .process_oa import process_oa from .uprn import get_oa_uprns, load_uprns def main() -> None: parser = argparse.ArgumentParser( description="Generate postcode boundary polygons from OA + INSPIRE + UPRN data" ) parser.add_argument("--uprn", type=Path, required=True, help="UPRN lookup parquet") parser.add_argument( "--oa-boundaries", type=Path, required=True, help="OA boundaries GeoPackage" ) parser.add_argument( "--inspire", type=Path, required=True, help="INSPIRE ZIP directory" ) parser.add_argument("--output", type=Path, required=True, help="Output directory") parser.add_argument( "--limit", type=int, default=0, help="Process only first N OAs (0=all)" ) parser.add_argument( "--greenspace", type=Path, default=None, help="Greenspace/water parquet for boundary trimming (optional)", ) args = parser.parse_args() # Phase 1: Load all data print("=" * 60) print("Phase 1: Loading data") print("=" * 60) oa_geoms = load_oa_boundaries(args.oa_boundaries) uprn_df, uprn_offsets = load_uprns(args.uprn) # Phase 2: Parse/load INSPIRE print() print("=" * 60) print("Phase 2: INSPIRE data") print("=" * 60) inspire_cache_dir = args.output / "inspire_cache" if not inspire_cache_exists(inspire_cache_dir): cache_inspire(args.inspire, inspire_cache_dir) inspire_bboxes, inspire_offsets, inspire_coords = load_inspire(inspire_cache_dir) # Phase 3: Process OAs print() print("=" * 60) print("Phase 3: Processing OAs") print("=" * 60) # Build work list — precompute which OAs are single vs multi-postcode oa_codes_with_data = sorted(set(oa_geoms.keys()) & set(uprn_offsets.keys())) skipped_no_uprn = len(oa_geoms) - len(oa_codes_with_data) skipped_no_boundary = len(uprn_offsets) - len(oa_codes_with_data) if args.limit > 0: oa_codes_with_data = oa_codes_with_data[: args.limit] print(f" OAs with UPRNs + boundaries: {len(oa_codes_with_data)}") print(f" Skipped (no UPRNs): {skipped_no_uprn}") print(f" Skipped (no boundary): {skipped_no_boundary}") all_fragments: list[tuple[str, Polygon | MultiPolygon]] = [] single_count = 0 multi_count = 0 for oa_code in tqdm( oa_codes_with_data, desc="Processing OAs", unit="OA", smoothing=0.01, miniters=100, ): oa_geom = oa_geoms[oa_code] points, postcodes = get_oa_uprns(uprn_df, uprn_offsets, oa_code) if len(set(postcodes)) == 1: # Fast path: entire OA = one postcode all_fragments.append((postcodes[0], oa_geom)) single_count += 1 continue # Get INSPIRE candidates via bbox pre-filter candidates = get_inspire_candidates( oa_geom.bounds, inspire_bboxes, inspire_offsets, inspire_coords ) fragments = process_oa(oa_geom, points, postcodes, candidates) all_fragments.extend(fragments) multi_count += 1 print(f"\n Single-postcode OAs (fast path): {single_count}") print(f" Multi-postcode OAs (INSPIRE+Voronoi): {multi_count}") print(f" Total fragments: {len(all_fragments)}") # Free data no longer needed del oa_geoms, uprn_df, uprn_offsets del inspire_bboxes, inspire_offsets, inspire_coords release_memory() # Phase 4: Merge and write print() print("=" * 60) print("Phase 4: Merging fragments and writing GeoJSON") print("=" * 60) greenspace_tree = None greenspace_geoms = None if args.greenspace and args.greenspace.exists(): from .greenspace import load_greenspace print(f" Loading greenspace/water from {args.greenspace}...") greenspace_tree, greenspace_geoms = load_greenspace(args.greenspace) print(f" Loaded {len(greenspace_geoms)} greenspace/water polygons") merged = merge_fragments( all_fragments, greenspace_tree=greenspace_tree, greenspace_geoms=greenspace_geoms, ) print(f" Merged into {len(merged)} unique postcodes") file_count = write_district_geojson(merged, args.output) print(f"\n Wrote {file_count} district GeoJSON files to {args.output / 'units'}") print("Done!") if __name__ == "__main__": main()