"""Build PMTiles polygon tiles for the Trees Outside Woodland overlay.""" from __future__ import annotations import argparse import json import shutil import subprocess import tempfile from pathlib import Path import numpy as np import pyogrio import shapely from pyproj import Transformer from pipeline.local_temp import local_tmp_dir from pipeline.transform.tree_density import ( DEFAULT_TOW_TYPES, _layers, _tow_dataset_path, _where_for_tow_types, ) def _require_tippecanoe() -> str: executable = shutil.which("tippecanoe") if executable is None: raise RuntimeError( "tippecanoe is required to build tree overlay PMTiles. " "Install tippecanoe and rerun this target." ) return executable def _column_or_none(batch, names: list[str], column: str): if column not in names: return None return batch.column(names.index(column)).to_numpy(zero_copy_only=False) def _number_or_none(value) -> float | int | None: if value is None: return None try: if np.isfinite(value): if float(value).is_integer(): return int(value) return round(float(value), 2) except TypeError: return None return None def _write_tree_geojsonseq( dataset_path: str, output_path: Path, tow_types: tuple[str, ...], batch_size: int, layer_names: tuple[str, ...] | None, max_features_per_layer: int | None, ) -> int: to_wgs84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True) where = _where_for_tow_types(tow_types) layers = _layers(dataset_path, layer_names) print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}") if where: print(f"TOW type filter: {where}") columns = [ "TOW_ID", "Woodland_Type", "TOW_Area_M", "MEANHT", "MINHT", "MAXHT", "LiDAR_Survey_Year", ] feature_count = 0 with output_path.open("w") as file: for layer in layers: info = pyogrio.read_info(dataset_path, layer=layer) print(f"\nLayer {layer}: {info.get('features', 0):,} features") layer_features_seen = 0 with pyogrio.open_arrow( dataset_path, layer=layer, columns=columns, where=where, batch_size=batch_size, use_pyarrow=True, ) as (_meta, reader): for batch in reader: if max_features_per_layer is not None: remaining = max_features_per_layer - layer_features_seen if remaining <= 0: break if batch.num_rows > remaining: batch = batch.slice(0, remaining) layer_features_seen += batch.num_rows names = batch.schema.names area = np.asarray( batch.column(names.index("TOW_Area_M")).to_numpy( zero_copy_only=False ), dtype=np.float64, ) geometry = np.asarray( batch.column(names.index("SHAPE")).to_numpy( zero_copy_only=False ), dtype=object, ) valid = np.isfinite(area) & (area > 0) if not valid.any(): continue tow_id = _column_or_none(batch, names, "TOW_ID") woodland_type = _column_or_none(batch, names, "Woodland_Type") mean_height = _column_or_none(batch, names, "MEANHT") min_height = _column_or_none(batch, names, "MINHT") max_height = _column_or_none(batch, names, "MAXHT") lidar_year = _column_or_none(batch, names, "LiDAR_Survey_Year") geometries = shapely.from_wkb(geometry[valid]) geometries = shapely.transform( geometries, to_wgs84.transform, interleaved=False, ) geometries_json = shapely.to_geojson(geometries) valid_indexes = np.flatnonzero(valid) for idx, geometry_json in zip(valid_indexes, geometries_json): properties = { "tow_id": str(tow_id[idx]) if tow_id is not None else "", "woodland_type": ( str(woodland_type[idx]) if woodland_type is not None else "" ), "area_sqm": _number_or_none(area[idx]), "mean_height_m": ( _number_or_none(mean_height[idx]) if mean_height is not None else None ), "min_height_m": ( _number_or_none(min_height[idx]) if min_height is not None else None ), "max_height_m": ( _number_or_none(max_height[idx]) if max_height is not None else None ), "lidar_year": ( _number_or_none(lidar_year[idx]) if lidar_year is not None else None ), "source_layer": layer, } feature = { "type": "Feature", "geometry": json.loads(geometry_json), "properties": properties, } file.write(json.dumps(feature, separators=(",", ":")) + "\n") feature_count += 1 return feature_count def build_tree_overlay_tiles( tow_zip: Path, output_path: Path, extract_dir: Path, tow_types: tuple[str, ...], batch_size: int, layer_names: tuple[str, ...] | None, max_features_per_layer: int | None, min_zoom: int, max_zoom: int, force_extract: bool, use_vsizip: bool, ) -> None: tippecanoe = _require_tippecanoe() dataset_path = _tow_dataset_path(tow_zip, extract_dir, force_extract, use_vsizip) output_path.parent.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp: ndjson_path = Path(tmp) / "trees_outside_woodlands.geojsonseq" feature_count = _write_tree_geojsonseq( dataset_path, ndjson_path, tow_types, batch_size, layer_names, max_features_per_layer, ) print(f"Writing {feature_count:,} TOW polygon features") subprocess.run( [ tippecanoe, "--force", "--output", str(output_path), "--layer", "trees_outside_woodlands", "--minimum-zoom", str(min_zoom), "--maximum-zoom", str(max_zoom), "--drop-smallest-as-needed", "--extend-zooms-if-still-dropping", str(ndjson_path), ], check=True, ) def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--tow-zip", type=Path, required=True) parser.add_argument("--output", type=Path, required=True) parser.add_argument( "--extract-dir", type=Path, default=Path("property-data/fr_tow_v1_all"), help="Directory used to extract the FileGDB", ) parser.add_argument( "--tow-type", action="append", dest="tow_types", help="Woodland_Type to include; repeatable. Defaults to TOW outside-woodland classes.", ) parser.add_argument("--batch-size", type=int, default=50_000) parser.add_argument("--layer", action="append", dest="layers") parser.add_argument("--max-features-per-layer", type=int) parser.add_argument("--min-zoom", type=int, default=15) parser.add_argument("--max-zoom", type=int, default=17) parser.add_argument("--force-extract", action="store_true") parser.add_argument("--use-vsizip", action="store_true") args = parser.parse_args() build_tree_overlay_tiles( tow_zip=args.tow_zip, output_path=args.output, extract_dir=args.extract_dir, tow_types=tuple(args.tow_types or DEFAULT_TOW_TYPES), batch_size=args.batch_size, layer_names=tuple(args.layers) if args.layers else None, max_features_per_layer=args.max_features_per_layer, min_zoom=args.min_zoom, max_zoom=args.max_zoom, force_extract=args.force_extract, use_vsizip=args.use_vsizip, ) if __name__ == "__main__": main()