perfect-postcode/pipeline/download/conservation_areas.py
2026-05-28 21:48:35 +01:00

73 lines
2.4 KiB
Python

"""Download Planning Data conservation area polygons.
Source: https://www.planning.data.gov.uk/dataset/conservation-area
License: Open Government Licence v3.0
"""
import argparse
from pathlib import Path
import httpx
import pyogrio
from shapely import from_wkb
URL = "https://files.planning.data.gov.uk/dataset/conservation-area.geojson"
def _geometry_column(metadata: dict, column_names: list[str]) -> str:
geometry_name = metadata.get("geometry_name")
if geometry_name:
return str(geometry_name)
for name in ("wkb_geometry", "geometry", "geom"):
if name in column_names:
return name
return column_names[-1]
def _validate_conservation_areas(path: Path) -> int:
info = pyogrio.read_info(path)
features = info.get("features", 0)
if features <= 0:
raise ValueError("Downloaded conservation areas file contains no features")
metadata, table = pyogrio.read_arrow(path, columns=[], read_geometry=True)
geometry_name = _geometry_column(metadata, table.column_names)
geometries = from_wkb(table[geometry_name].combine_chunks().to_pylist())
polygon_count = sum(
geom is not None
and not geom.is_empty
and geom.geom_type in {"Polygon", "MultiPolygon"}
for geom in geometries
)
if polygon_count <= 0:
raise ValueError("Downloaded conservation areas file contains no polygons")
return int(features)
def main() -> None:
parser = argparse.ArgumentParser(
description="Download Planning Data conservation area polygons"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output GeoJSON file path"
)
args = parser.parse_args()
args.output.parent.mkdir(parents=True, exist_ok=True)
tmp_path = args.output.with_name(f"{args.output.stem}.tmp{args.output.suffix}")
print("Downloading Planning Data conservation areas...")
with httpx.stream("GET", URL, follow_redirects=True, timeout=300) as response:
response.raise_for_status()
with tmp_path.open("wb") as fh:
for chunk in response.iter_bytes():
fh.write(chunk)
features = _validate_conservation_areas(tmp_path)
tmp_path.replace(args.output)
size_mb = args.output.stat().st_size / (1024 * 1024)
print(f"Saved {features} conservation areas to {args.output} ({size_mb:.1f} MB)")
if __name__ == "__main__":
main()