This commit is contained in:
Andras Schmelczer 2026-05-31 20:20:41 +01:00
parent 8688b7475e
commit e8345cbdc1
40 changed files with 1980 additions and 904 deletions

View file

@ -1,10 +1,12 @@
import json
import shutil
from collections import defaultdict
from pathlib import Path
from pyproj import Transformer
from shapely import make_valid
from shapely.geometry import MultiPolygon, Polygon
from shapely import make_valid, set_precision
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
from shapely.ops import transform as transform_geometry
from shapely.ops import unary_union
from tqdm import tqdm
@ -18,49 +20,47 @@ def _get_to_wgs84():
return _to_wgs84
def _largest_polygonal(geom) -> Polygon | None:
if geom is None or geom.is_empty:
return None
if not geom.is_valid:
geom = make_valid(geom)
if geom.geom_type == "Polygon":
return geom
if geom.geom_type == "MultiPolygon":
return max(geom.geoms, key=lambda g: g.area)
if geom.geom_type == "GeometryCollection":
polygons = [
polygon
for part in geom.geoms
if (polygon := _largest_polygonal(part)) is not None
]
if polygons:
return max(polygons, key=lambda g: g.area)
return None
def to_wgs84_geojson(
geom: Polygon | MultiPolygon, tolerance: float = 1.0
) -> dict | None:
"""Simplify geometry in BNG, convert to WGS84, return GeoJSON dict."""
if geom.is_empty:
geom = _largest_polygonal(geom)
if geom is None:
return None
simplified = geom.simplify(tolerance, preserve_topology=True)
if simplified.is_empty:
simplified = _largest_polygonal(simplified)
if simplified is None:
return None
transformer = _get_to_wgs84()
def transform_ring(coords):
xs, ys = zip(*coords)
lons, lats = transformer.transform(list(xs), list(ys))
return [(round(lon, 6), round(lat, 6)) for lon, lat in zip(lons, lats)]
def transform_polygon(poly):
exterior = transform_ring(poly.exterior.coords)
holes = [transform_ring(h.coords) for h in poly.interiors]
return [exterior] + holes
# Force single Polygon — postcodes are contiguous delivery routes
if simplified.geom_type == "MultiPolygon":
simplified = max(simplified.geoms, key=lambda g: g.area)
elif simplified.geom_type == "GeometryCollection":
polys = [
g for g in simplified.geoms if g.geom_type in ("Polygon", "MultiPolygon")
]
if not polys:
return None
simplified = max(polys, key=lambda g: g.area)
if simplified.geom_type == "MultiPolygon":
simplified = max(simplified.geoms, key=lambda g: g.area)
if simplified.geom_type != "Polygon" or simplified.is_empty:
wgs84 = transform_geometry(transformer.transform, simplified)
wgs84 = set_precision(wgs84, 0.000001, mode="valid_output")
wgs84 = _largest_polygonal(wgs84)
if wgs84 is None:
return None
return {
"type": "Polygon",
"coordinates": transform_polygon(simplified),
}
return mapping(wgs84)
def _fill_holes(geom):
@ -132,7 +132,11 @@ def write_district_geojson(
) -> int:
"""Group postcodes by district, write GeoJSON files. Returns file count."""
units_dir = output_dir / "units"
units_dir.mkdir(parents=True, exist_ok=True)
tmp_units_dir = output_dir / "units.tmp"
output_dir.mkdir(parents=True, exist_ok=True)
if tmp_units_dir.exists():
shutil.rmtree(tmp_units_dir)
tmp_units_dir.mkdir(parents=True)
by_district: dict[str, list[tuple[str, Polygon | MultiPolygon]]] = defaultdict(list)
for pc, geom in postcodes.items():
@ -141,14 +145,23 @@ def write_district_geojson(
by_district[district].append((pc, geom))
file_count = 0
seen_postcodes: set[str] = set()
for district, entries in tqdm(
sorted(by_district.items()), desc="Writing GeoJSON", unit="file"
):
features = []
for pc, geom in sorted(entries, key=lambda x: x[0]):
if pc in seen_postcodes:
raise ValueError(f"Duplicate postcode boundary feature: {pc}")
seen_postcodes.add(pc)
geojson_geom = to_wgs84_geojson(geom)
if geojson_geom is None:
continue
raise ValueError(f"Postcode boundary collapsed to empty geometry: {pc}")
written_geom = shape(geojson_geom)
if written_geom.is_empty or not written_geom.is_valid:
raise ValueError(
f"Invalid postcode boundary geometry after output: {pc}"
)
mapit_code = pc.replace(" ", "")
features.append(
{
@ -165,9 +178,12 @@ def write_district_geojson(
continue
collection = {"type": "FeatureCollection", "features": features}
out_path = units_dir / f"{district}.geojson"
out_path = tmp_units_dir / f"{district}.geojson"
with open(out_path, "w") as f:
json.dump(collection, f, separators=(",", ":"))
file_count += 1
if units_dir.exists():
shutil.rmtree(units_dir)
tmp_units_dir.replace(units_dir)
return file_count