146 lines
4.6 KiB
Python
146 lines
4.6 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
from shapely.geometry import MultiPolygon, Polygon
|
|
from tqdm import tqdm
|
|
|
|
from .inspire import (
|
|
cache_inspire,
|
|
get_inspire_candidates,
|
|
inspire_cache_exists,
|
|
load_inspire,
|
|
)
|
|
from .memory import release_memory
|
|
from .oa_boundaries import load_oa_boundaries
|
|
from .output import merge_fragments, write_district_geojson
|
|
from .process_oa import process_oa
|
|
from .uprn import get_oa_uprns, load_uprns
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate postcode boundary polygons from OA + INSPIRE + UPRN data"
|
|
)
|
|
parser.add_argument("--uprn", type=Path, required=True, help="UPRN lookup parquet")
|
|
parser.add_argument(
|
|
"--oa-boundaries", type=Path, required=True, help="OA boundaries GeoPackage"
|
|
)
|
|
parser.add_argument(
|
|
"--inspire", type=Path, required=True, help="INSPIRE ZIP directory"
|
|
)
|
|
parser.add_argument("--output", type=Path, required=True, help="Output directory")
|
|
parser.add_argument(
|
|
"--limit", type=int, default=0, help="Process only first N OAs (0=all)"
|
|
)
|
|
parser.add_argument(
|
|
"--greenspace",
|
|
type=Path,
|
|
default=None,
|
|
help="Greenspace/water parquet for boundary trimming (optional)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Phase 1: Load all data
|
|
print("=" * 60)
|
|
print("Phase 1: Loading data")
|
|
print("=" * 60)
|
|
|
|
oa_geoms = load_oa_boundaries(args.oa_boundaries)
|
|
uprn_df, uprn_offsets = load_uprns(args.uprn)
|
|
|
|
# Phase 2: Parse/load INSPIRE
|
|
print()
|
|
print("=" * 60)
|
|
print("Phase 2: INSPIRE data")
|
|
print("=" * 60)
|
|
|
|
inspire_cache_dir = args.output / "inspire_cache"
|
|
if not inspire_cache_exists(inspire_cache_dir):
|
|
cache_inspire(args.inspire, inspire_cache_dir)
|
|
inspire_bboxes, inspire_offsets, inspire_coords = load_inspire(inspire_cache_dir)
|
|
|
|
# Phase 3: Process OAs
|
|
print()
|
|
print("=" * 60)
|
|
print("Phase 3: Processing OAs")
|
|
print("=" * 60)
|
|
|
|
# Build work list — precompute which OAs are single vs multi-postcode
|
|
oa_codes_with_data = sorted(set(oa_geoms.keys()) & set(uprn_offsets.keys()))
|
|
skipped_no_uprn = len(oa_geoms) - len(oa_codes_with_data)
|
|
skipped_no_boundary = len(uprn_offsets) - len(oa_codes_with_data)
|
|
|
|
if args.limit > 0:
|
|
oa_codes_with_data = oa_codes_with_data[: args.limit]
|
|
|
|
print(f" OAs with UPRNs + boundaries: {len(oa_codes_with_data)}")
|
|
print(f" Skipped (no UPRNs): {skipped_no_uprn}")
|
|
print(f" Skipped (no boundary): {skipped_no_boundary}")
|
|
|
|
all_fragments: list[tuple[str, Polygon | MultiPolygon]] = []
|
|
single_count = 0
|
|
multi_count = 0
|
|
|
|
for oa_code in tqdm(
|
|
oa_codes_with_data,
|
|
desc="Processing OAs",
|
|
unit="OA",
|
|
smoothing=0.01,
|
|
miniters=100,
|
|
):
|
|
oa_geom = oa_geoms[oa_code]
|
|
points, postcodes = get_oa_uprns(uprn_df, uprn_offsets, oa_code)
|
|
|
|
if len(set(postcodes)) == 1:
|
|
# Fast path: entire OA = one postcode
|
|
all_fragments.append((postcodes[0], oa_geom))
|
|
single_count += 1
|
|
continue
|
|
|
|
# Get INSPIRE candidates via bbox pre-filter
|
|
candidates = get_inspire_candidates(
|
|
oa_geom.bounds, inspire_bboxes, inspire_offsets, inspire_coords
|
|
)
|
|
|
|
fragments = process_oa(oa_geom, points, postcodes, candidates)
|
|
all_fragments.extend(fragments)
|
|
multi_count += 1
|
|
|
|
print(f"\n Single-postcode OAs (fast path): {single_count}")
|
|
print(f" Multi-postcode OAs (INSPIRE+Voronoi): {multi_count}")
|
|
print(f" Total fragments: {len(all_fragments)}")
|
|
|
|
# Free data no longer needed
|
|
del oa_geoms, uprn_df, uprn_offsets
|
|
del inspire_bboxes, inspire_offsets, inspire_coords
|
|
release_memory()
|
|
|
|
# Phase 4: Merge and write
|
|
print()
|
|
print("=" * 60)
|
|
print("Phase 4: Merging fragments and writing GeoJSON")
|
|
print("=" * 60)
|
|
|
|
greenspace_tree = None
|
|
greenspace_geoms = None
|
|
if args.greenspace and args.greenspace.exists():
|
|
from .greenspace import load_greenspace
|
|
|
|
print(f" Loading greenspace/water from {args.greenspace}...")
|
|
greenspace_tree, greenspace_geoms = load_greenspace(args.greenspace)
|
|
print(f" Loaded {len(greenspace_geoms)} greenspace/water polygons")
|
|
|
|
merged = merge_fragments(
|
|
all_fragments,
|
|
greenspace_tree=greenspace_tree,
|
|
greenspace_geoms=greenspace_geoms,
|
|
)
|
|
print(f" Merged into {len(merged)} unique postcodes")
|
|
|
|
file_count = write_district_geojson(merged, args.output)
|
|
print(f"\n Wrote {file_count} district GeoJSON files to {args.output / 'units'}")
|
|
print("Done!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|