has issues
This commit is contained in:
parent
2e112d7398
commit
c645b0f1d4
96 changed files with 2147083 additions and 5787 deletions
269
pipeline/transform/tree_overlay_tiles.py
Normal file
269
pipeline/transform/tree_overlay_tiles.py
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
"""Build PMTiles polygon tiles for the Trees Outside Woodland overlay."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pyogrio
|
||||
import shapely
|
||||
from pyproj import Transformer
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.transform.tree_density import (
|
||||
DEFAULT_TOW_TYPES,
|
||||
_layers,
|
||||
_tow_dataset_path,
|
||||
_where_for_tow_types,
|
||||
)
|
||||
|
||||
|
||||
def _require_tippecanoe() -> str:
|
||||
executable = shutil.which("tippecanoe")
|
||||
if executable is None:
|
||||
raise RuntimeError(
|
||||
"tippecanoe is required to build tree overlay PMTiles. "
|
||||
"Install tippecanoe and rerun this target."
|
||||
)
|
||||
return executable
|
||||
|
||||
|
||||
def _column_or_none(batch, names: list[str], column: str):
|
||||
if column not in names:
|
||||
return None
|
||||
return batch.column(names.index(column)).to_numpy(zero_copy_only=False)
|
||||
|
||||
|
||||
def _number_or_none(value) -> float | int | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
if np.isfinite(value):
|
||||
if float(value).is_integer():
|
||||
return int(value)
|
||||
return round(float(value), 2)
|
||||
except TypeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _write_tree_geojsonseq(
|
||||
dataset_path: str,
|
||||
output_path: Path,
|
||||
tow_types: tuple[str, ...],
|
||||
batch_size: int,
|
||||
layer_names: tuple[str, ...] | None,
|
||||
max_features_per_layer: int | None,
|
||||
) -> int:
|
||||
to_wgs84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)
|
||||
where = _where_for_tow_types(tow_types)
|
||||
layers = _layers(dataset_path, layer_names)
|
||||
print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}")
|
||||
if where:
|
||||
print(f"TOW type filter: {where}")
|
||||
|
||||
columns = [
|
||||
"TOW_ID",
|
||||
"Woodland_Type",
|
||||
"TOW_Area_M",
|
||||
"MEANHT",
|
||||
"MINHT",
|
||||
"MAXHT",
|
||||
"LiDAR_Survey_Year",
|
||||
]
|
||||
feature_count = 0
|
||||
|
||||
with output_path.open("w") as file:
|
||||
for layer in layers:
|
||||
info = pyogrio.read_info(dataset_path, layer=layer)
|
||||
print(f"\nLayer {layer}: {info.get('features', 0):,} features")
|
||||
layer_features_seen = 0
|
||||
|
||||
with pyogrio.open_arrow(
|
||||
dataset_path,
|
||||
layer=layer,
|
||||
columns=columns,
|
||||
where=where,
|
||||
batch_size=batch_size,
|
||||
use_pyarrow=True,
|
||||
) as (_meta, reader):
|
||||
for batch in reader:
|
||||
if max_features_per_layer is not None:
|
||||
remaining = max_features_per_layer - layer_features_seen
|
||||
if remaining <= 0:
|
||||
break
|
||||
if batch.num_rows > remaining:
|
||||
batch = batch.slice(0, remaining)
|
||||
|
||||
layer_features_seen += batch.num_rows
|
||||
names = batch.schema.names
|
||||
area = np.asarray(
|
||||
batch.column(names.index("TOW_Area_M")).to_numpy(
|
||||
zero_copy_only=False
|
||||
),
|
||||
dtype=np.float64,
|
||||
)
|
||||
geometry = np.asarray(
|
||||
batch.column(names.index("SHAPE")).to_numpy(
|
||||
zero_copy_only=False
|
||||
),
|
||||
dtype=object,
|
||||
)
|
||||
valid = np.isfinite(area) & (area > 0)
|
||||
if not valid.any():
|
||||
continue
|
||||
|
||||
tow_id = _column_or_none(batch, names, "TOW_ID")
|
||||
woodland_type = _column_or_none(batch, names, "Woodland_Type")
|
||||
mean_height = _column_or_none(batch, names, "MEANHT")
|
||||
min_height = _column_or_none(batch, names, "MINHT")
|
||||
max_height = _column_or_none(batch, names, "MAXHT")
|
||||
lidar_year = _column_or_none(batch, names, "LiDAR_Survey_Year")
|
||||
|
||||
geometries = shapely.from_wkb(geometry[valid])
|
||||
geometries = shapely.transform(
|
||||
geometries,
|
||||
to_wgs84.transform,
|
||||
interleaved=False,
|
||||
)
|
||||
geometries_json = shapely.to_geojson(geometries)
|
||||
valid_indexes = np.flatnonzero(valid)
|
||||
|
||||
for idx, geometry_json in zip(valid_indexes, geometries_json):
|
||||
properties = {
|
||||
"tow_id": str(tow_id[idx]) if tow_id is not None else "",
|
||||
"woodland_type": (
|
||||
str(woodland_type[idx])
|
||||
if woodland_type is not None
|
||||
else ""
|
||||
),
|
||||
"area_sqm": _number_or_none(area[idx]),
|
||||
"mean_height_m": (
|
||||
_number_or_none(mean_height[idx])
|
||||
if mean_height is not None
|
||||
else None
|
||||
),
|
||||
"min_height_m": (
|
||||
_number_or_none(min_height[idx])
|
||||
if min_height is not None
|
||||
else None
|
||||
),
|
||||
"max_height_m": (
|
||||
_number_or_none(max_height[idx])
|
||||
if max_height is not None
|
||||
else None
|
||||
),
|
||||
"lidar_year": (
|
||||
_number_or_none(lidar_year[idx])
|
||||
if lidar_year is not None
|
||||
else None
|
||||
),
|
||||
"source_layer": layer,
|
||||
}
|
||||
feature = {
|
||||
"type": "Feature",
|
||||
"geometry": json.loads(geometry_json),
|
||||
"properties": properties,
|
||||
}
|
||||
file.write(json.dumps(feature, separators=(",", ":")) + "\n")
|
||||
feature_count += 1
|
||||
|
||||
return feature_count
|
||||
|
||||
|
||||
def build_tree_overlay_tiles(
|
||||
tow_zip: Path,
|
||||
output_path: Path,
|
||||
extract_dir: Path,
|
||||
tow_types: tuple[str, ...],
|
||||
batch_size: int,
|
||||
layer_names: tuple[str, ...] | None,
|
||||
max_features_per_layer: int | None,
|
||||
min_zoom: int,
|
||||
max_zoom: int,
|
||||
force_extract: bool,
|
||||
use_vsizip: bool,
|
||||
) -> None:
|
||||
tippecanoe = _require_tippecanoe()
|
||||
dataset_path = _tow_dataset_path(tow_zip, extract_dir, force_extract, use_vsizip)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp:
|
||||
ndjson_path = Path(tmp) / "trees_outside_woodlands.geojsonseq"
|
||||
feature_count = _write_tree_geojsonseq(
|
||||
dataset_path,
|
||||
ndjson_path,
|
||||
tow_types,
|
||||
batch_size,
|
||||
layer_names,
|
||||
max_features_per_layer,
|
||||
)
|
||||
print(f"Writing {feature_count:,} TOW polygon features")
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
tippecanoe,
|
||||
"--force",
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--layer",
|
||||
"trees_outside_woodlands",
|
||||
"--minimum-zoom",
|
||||
str(min_zoom),
|
||||
"--maximum-zoom",
|
||||
str(max_zoom),
|
||||
"--drop-smallest-as-needed",
|
||||
"--extend-zooms-if-still-dropping",
|
||||
str(ndjson_path),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--tow-zip", type=Path, required=True)
|
||||
parser.add_argument("--output", type=Path, required=True)
|
||||
parser.add_argument(
|
||||
"--extract-dir",
|
||||
type=Path,
|
||||
default=Path("property-data/fr_tow_v1_all"),
|
||||
help="Directory used to extract the FileGDB",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tow-type",
|
||||
action="append",
|
||||
dest="tow_types",
|
||||
help="Woodland_Type to include; repeatable. Defaults to TOW outside-woodland classes.",
|
||||
)
|
||||
parser.add_argument("--batch-size", type=int, default=50_000)
|
||||
parser.add_argument("--layer", action="append", dest="layers")
|
||||
parser.add_argument("--max-features-per-layer", type=int)
|
||||
parser.add_argument("--min-zoom", type=int, default=15)
|
||||
parser.add_argument("--max-zoom", type=int, default=17)
|
||||
parser.add_argument("--force-extract", action="store_true")
|
||||
parser.add_argument("--use-vsizip", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
build_tree_overlay_tiles(
|
||||
tow_zip=args.tow_zip,
|
||||
output_path=args.output,
|
||||
extract_dir=args.extract_dir,
|
||||
tow_types=tuple(args.tow_types or DEFAULT_TOW_TYPES),
|
||||
batch_size=args.batch_size,
|
||||
layer_names=tuple(args.layers) if args.layers else None,
|
||||
max_features_per_layer=args.max_features_per_layer,
|
||||
min_zoom=args.min_zoom,
|
||||
max_zoom=args.max_zoom,
|
||||
force_extract=args.force_extract,
|
||||
use_vsizip=args.use_vsizip,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue