This commit is contained in:
Andras Schmelczer 2026-05-31 20:20:41 +01:00
parent 8688b7475e
commit e8345cbdc1
40 changed files with 1980 additions and 904 deletions

View file

@ -1,16 +1,28 @@
"""Derive street-scale tree density metrics from Forest Research TOW + NFI data.
"""Derive postcode-scale tree density metrics from Forest Research TOW + NFI data.
The Forest Research Trees Outside Woodland release is an Esri File Geodatabase
inside property-data/FR_TOW_V1_ALL.zip. This transformer computes a compact
postcode-level metric from the tree polygons, then optionally rolls that up to
Price Paid street names so the dashboard can answer "what is this address's
street like?" without loading the full geodatabase at runtime.
postcode-level metric from the tree polygons so the dashboard can answer "how
green is this postcode?" without loading the full geodatabase at runtime.
TOW only covers trees *outside* woodland, so the National Forest Inventory (NFI)
woodland layer is optionally unioned in. TOW canopy is accumulated by centroid
proximity (tiny crowns), while large NFI woodland parcels are accumulated by
true buffer-clipped intersection area so they cannot saturate a postcode from
mere centroid proximity.
Every postcode centroid is expanded into a radius-r buffer ("extended area").
Both TOW tree crowns and National Forest Inventory (NFI) woodland parcels are
accumulated by *true buffer-clipped intersection area*: only the part of each
polygon that falls inside a postcode's buffer is counted, never the area that
spills outside it. A crown straddling the buffer edge therefore contributes only
its inside portion, and a parcel reaching into the buffer from outside is still
counted -- no polygon can saturate a postcode from mere proximity.
TOW only covers trees *outside* woodland, so the NFI woodland layer is the
geometric complement of TOW and is optionally unioned in. The two products are
*assumed disjoint*: clipped TOW crown area and clipped NFI woodland area are
summed into the same per-postcode accumulator, so any spatial overlap between a
TOW crown and an NFI parcel (boundary slop where "groups of trees" meet
"woodland") would be double-counted. The final density is capped at 100% and
_finalize_metrics logs how many postcodes exceed 100% raw coverage, which is a
direct symptom of such overlap (or of overlapping crowns within one buffer); if
that count is material the products are not disjoint and the NFI clip should be
taken against the complement of TOW.
"""
from __future__ import annotations
@ -25,16 +37,12 @@ import numpy as np
import polars as pl
import pyogrio
import shapely
from scipy.spatial import cKDTree
TOW_GDB_NAME = "FR_TOW_V1_ALL.gdb"
STREET_TREE_DENSITY_COL = "Street tree density percentile"
STREET_TREE_COVERAGE_COL = "Street tree coverage (%)"
POSTCODE_DENSITY_COL = "Tree canopy density within {radius}m (%)"
POSTCODE_DENSITY_PERCENTILE_COL = "Tree canopy density percentile within {radius}m"
POSTCODE_AREA_COL = "Tree canopy area within {radius}m (sqm)"
POSTCODE_COUNT_COL = "Tree features within {radius}m"
POSTCODE_HEIGHT_COL = "Mean TOW height within {radius}m (m)"
# National Forest Inventory (NFI) woodland — the geometric complement of TOW.
@ -131,13 +139,24 @@ def _safe_extract_zip_dir(zip_path: Path, extract_dir: Path, force: bool) -> Pat
def _nfi_dataset_path(
zip_path: Path, extract_dir: Path, force_extract: bool, use_vsizip: bool
) -> str:
"""Resolve the NFI woodland shapefile path, extracting the zip if needed."""
"""Resolve the NFI woodland shapefile path, extracting the zip if needed.
Raises if the archive contains zero or more than one shapefile rather than
silently picking one, so an ambiguous NFI release fails loudly instead of
accumulating canopy from the wrong layer.
"""
if use_vsizip:
return f"/vsizip/{zip_path.resolve()}"
extracted = _safe_extract_zip_dir(zip_path, extract_dir, force_extract)
shapefiles = sorted(extracted.rglob("*.shp"))
if not shapefiles:
raise FileNotFoundError(f"No .shp found inside {zip_path}")
if len(shapefiles) > 1:
names = ", ".join(path.name for path in shapefiles)
raise ValueError(
f"Expected exactly one shapefile inside {zip_path}, found {len(shapefiles)} "
f"({names}); cannot unambiguously pick the NFI woodland layer"
)
return str(shapefiles[0])
@ -146,7 +165,7 @@ def _geometry_column(metadata: dict, column_names: list[str]) -> str:
geometry_name = metadata.get("geometry_name")
if geometry_name:
return str(geometry_name)
for name in ("wkb_geometry", "geometry", "geom"):
for name in ("wkb_geometry", "geometry", "geom", "SHAPE"):
if name in column_names:
return name
return column_names[-1]
@ -184,11 +203,10 @@ def _layers(dataset_path: str, selected_layers: tuple[str, ...] | None) -> list[
return [layer for layer in available if layer in selected_layers]
def _metric_columns(radius_m: int) -> tuple[str, str, str, str]:
def _metric_columns(radius_m: int) -> tuple[str, str, str]:
return (
POSTCODE_DENSITY_COL.format(radius=radius_m),
POSTCODE_AREA_COL.format(radius=radius_m),
POSTCODE_COUNT_COL.format(radius=radius_m),
POSTCODE_HEIGHT_COL.format(radius=radius_m),
)
@ -198,20 +216,23 @@ def _postcode_density_percentile_col(radius_m: int) -> str:
def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
"""Rank higher tree coverage higher on a 0-100 England-wide percentile scale."""
"""Rank tree coverage on a 0-100 England-wide percentile scale.
A single tie-consistent average-rank formula is used for every value so the
scale is internally consistent end to end: tied values share their mean rank,
so the lowest coverage maps toward 0 and the highest toward 100 only when they
are not themselves tied. An all-equal (or single-value) column has no spread
and maps to the neutral midpoint (50).
"""
value = pl.col(column).fill_nan(None)
non_null_count = value.count()
rank = value.rank("average")
return (
pl.when(value.is_null())
.then(None)
.when(value == value.min())
.then(0.0)
.when(value == value.max())
.then(100.0)
.when(non_null_count > 1)
.then(((rank - 1) / (non_null_count - 1) * 100).round(1))
.otherwise(100.0)
.otherwise(50.0)
.cast(pl.Float32)
.alias(alias)
)
@ -220,7 +241,7 @@ def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
def _with_postcode_density_percentiles(
postcode_metrics: pl.DataFrame, radius_m: int
) -> pl.DataFrame:
density_col, _area_col, _count_col, _height_col = _metric_columns(radius_m)
density_col, _area_col, _height_col = _metric_columns(radius_m)
return postcode_metrics.with_columns(
_coverage_percentile_expr(
density_col,
@ -229,28 +250,88 @@ def _with_postcode_density_percentiles(
)
def _accumulate_tree_metrics(
def _postcode_buffers(
points: pl.DataFrame, radius_m: int
) -> tuple[np.ndarray, shapely.STRtree]:
"""Build a radius-r circle for every postcode plus an STRtree over them.
Circle index == postcode index, so an STRtree match resolves directly to the
postcode accumulator slot.
"""
xy = points.select("x", "y").to_numpy()
circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
return circles, shapely.STRtree(circles)
def _accumulate_clipped_area(
geoms: np.ndarray,
circles: np.ndarray,
tree: shapely.STRtree,
canopy_area: np.ndarray,
height: np.ndarray | None = None,
height_weighted_sum: np.ndarray | None = None,
height_weight: np.ndarray | None = None,
) -> None:
"""Add each polygon's in-buffer overlap area to every postcode it intersects.
Only area(polygon circle) is accumulated -- never the area of the polygon
that falls outside the postcode's extended buffer -- so a crown straddling
the buffer edge contributes only its inside portion and a large parcel cannot
saturate a postcode from mere proximity. When ``height`` is supplied the mean
feature height is accumulated weighted by that same clipped overlap area.
"""
keep = ~shapely.is_missing(geoms) & ~shapely.is_empty(geoms)
geoms = geoms[keep]
if height is not None:
height = height[keep]
if geoms.size == 0:
return
# query(predicate="intersects") over the circle STRtree returns exactly the
# (polygon, circle) pairs whose clipped overlap can be positive -- i.e. the
# polygon overlaps that postcode's radius-r buffer.
geom_index, postcode_index = tree.query(geoms, predicate="intersects")
if geom_index.size == 0:
return
clipped_area = shapely.area(
shapely.intersection(geoms[geom_index], circles[postcode_index])
)
positive = clipped_area > 0
geom_index = geom_index[positive]
postcode_index = postcode_index[positive]
clipped_area = clipped_area[positive]
np.add.at(canopy_area, postcode_index, clipped_area)
if height is not None:
feature_height = height[geom_index]
finite = np.isfinite(feature_height)
if finite.any():
np.add.at(
height_weighted_sum,
postcode_index[finite],
feature_height[finite] * clipped_area[finite],
)
np.add.at(height_weight, postcode_index[finite], clipped_area[finite])
def _accumulate_tow_metrics(
dataset_path: str,
points: pl.DataFrame,
radius_m: int,
circles: np.ndarray,
tree: shapely.STRtree,
canopy_area: np.ndarray,
height_weighted_sum: np.ndarray,
height_weight: np.ndarray,
batch_size: int,
layer_names: tuple[str, ...] | None,
max_features_per_layer: int | None,
workers: int,
canopy_area: np.ndarray,
feature_count: np.ndarray,
height_weighted_sum: np.ndarray,
height_weight: np.ndarray,
) -> None:
xy = points.select("x", "y").to_numpy()
tree = cKDTree(xy)
layers = _layers(dataset_path, layer_names)
print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}")
columns = ["Woodland_Type", "TOW_Area_M", "MEANHT"]
columns = ["MEANHT"]
total_features_seen = 0
total_features_used = 0
for layer in layers:
info = pyogrio.read_info(dataset_path, layer=layer)
@ -263,7 +344,7 @@ def _accumulate_tree_metrics(
columns=columns,
batch_size=batch_size,
use_pyarrow=True,
) as (_meta, reader):
) as (meta, reader):
for batch_index, batch in enumerate(reader, start=1):
if max_features_per_layer is not None:
remaining = max_features_per_layer - layer_features_seen
@ -275,135 +356,29 @@ def _accumulate_tree_metrics(
layer_features_seen += batch.num_rows
total_features_seen += batch.num_rows
names = batch.schema.names
area = np.asarray(
batch.column(names.index("TOW_Area_M")).to_numpy(zero_copy_only=False),
dtype=np.float64,
)
geometry_column = _geometry_column(meta, names)
height = np.asarray(
batch.column(names.index("MEANHT")).to_numpy(zero_copy_only=False),
dtype=np.float64,
)
geometry = np.asarray(
batch.column(names.index("SHAPE")).to_numpy(zero_copy_only=False),
batch.column(names.index(geometry_column)).to_numpy(
zero_copy_only=False
),
dtype=object,
)
valid = np.isfinite(area) & (area > 0)
if not valid.any():
continue
geometry = geometry[valid]
area = area[valid]
height = height[valid]
centroids = shapely.centroid(shapely.from_wkb(geometry))
x = shapely.get_x(centroids)
y = shapely.get_y(centroids)
valid_xy = np.isfinite(x) & np.isfinite(y)
if not valid_xy.any():
continue
x = x[valid_xy]
y = y[valid_xy]
area = area[valid_xy]
height = height[valid_xy]
nearby = tree.query_ball_point(
np.column_stack((x, y)), radius_m, workers=workers
_accumulate_clipped_area(
shapely.from_wkb(geometry),
circles,
tree,
canopy_area,
height=height,
height_weighted_sum=height_weighted_sum,
height_weight=height_weight,
)
lengths = np.fromiter(
(len(postcode_indexes) for postcode_indexes in nearby),
dtype=np.int32,
count=len(nearby),
)
matching_features = lengths > 0
if matching_features.any():
postcode_indexes = np.concatenate(
[indexes for indexes in nearby if indexes]
).astype(np.int64, copy=False)
feature_indexes = np.repeat(
np.flatnonzero(matching_features), lengths[matching_features]
)
np.add.at(canopy_area, postcode_indexes, area[feature_indexes])
np.add.at(feature_count, postcode_indexes, 1)
feature_height = height[feature_indexes]
valid_height = np.isfinite(feature_height)
if valid_height.any():
height_area = area[feature_indexes][valid_height]
np.add.at(
height_weighted_sum,
postcode_indexes[valid_height],
feature_height[valid_height] * height_area,
)
np.add.at(
height_weight,
postcode_indexes[valid_height],
height_area,
)
total_features_used += len(area)
if batch_index == 1 or batch_index % 25 == 0:
print(
f" batch {batch_index:,}: "
f"{total_features_seen:,} rows read, "
f"{total_features_used:,} features with usable centroids"
)
def _postcode_buffers(
points: pl.DataFrame, radius_m: int
) -> tuple[np.ndarray, shapely.STRtree]:
"""Build a radius-r circle for every postcode plus an STRtree over them.
Circle index == postcode index, matching the order used by the cKDTree path.
"""
xy = points.select("x", "y").to_numpy()
circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
return circles, shapely.STRtree(circles)
def _add_nfi_batch(
geoms: np.ndarray,
category: np.ndarray,
circles: np.ndarray,
tree: shapely.STRtree,
canopy_area: np.ndarray,
feature_count: np.ndarray,
radius_m: int,
) -> None:
"""Add NFI woodland into the shared arrays by true buffer-clipped area.
Unlike the TOW centroid path, this clips each woodland polygon to each
nearby postcode circle and adds only area(polygon circle); a large parcel
therefore cannot saturate a postcode from mere centroid proximity, and a
buffer-filling parcel whose centroid is outside the radius is not missed.
"""
keep = (category == NFI_WOODLAND_VALUE) & ~shapely.is_missing(geoms)
geoms = geoms[keep]
if geoms.size:
geoms = geoms[~shapely.is_empty(geoms)]
if geoms.size == 0:
return
# dwithin(polygon, point, r) is true iff the radius-r circle around the
# point intersects the polygon -- exactly the candidate set we want.
nfi_index, postcode_index = tree.query(
geoms, predicate="dwithin", distance=radius_m
)
if nfi_index.size == 0:
return
clipped_area = shapely.area(
shapely.intersection(geoms[nfi_index], circles[postcode_index])
)
positive = clipped_area > 0
postcode_index = postcode_index[positive]
clipped_area = clipped_area[positive]
np.add.at(canopy_area, postcode_index, clipped_area)
np.add.at(feature_count, postcode_index, 1)
print(f" batch {batch_index:,}: {total_features_seen:,} rows read")
def _accumulate_nfi_metrics(
@ -411,8 +386,6 @@ def _accumulate_nfi_metrics(
circles: np.ndarray,
tree: shapely.STRtree,
canopy_area: np.ndarray,
feature_count: np.ndarray,
radius_m: int,
batch_size: int,
max_nfi_features: int | None,
) -> None:
@ -455,14 +428,12 @@ def _accumulate_nfi_metrics(
),
dtype=object,
)
_add_nfi_batch(
shapely.from_wkb(geometry),
category,
geoms = shapely.from_wkb(geometry)
_accumulate_clipped_area(
geoms[category == NFI_WOODLAND_VALUE],
circles,
tree,
canopy_area,
feature_count,
radius_m,
)
if batch_index == 1 or batch_index % 25 == 0:
print(f" NFI batch {batch_index:,}: {features_seen:,} rows read")
@ -471,15 +442,26 @@ def _accumulate_nfi_metrics(
def _finalize_metrics(
points: pl.DataFrame,
canopy_area: np.ndarray,
feature_count: np.ndarray,
height_weighted_sum: np.ndarray,
height_weight: np.ndarray,
radius_m: int,
) -> pl.DataFrame:
n_points = points.height
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
density_col, area_col, height_col = _metric_columns(radius_m)
buffer_area = math.pi * radius_m * radius_m
density_pct = np.minimum(canopy_area / buffer_area * 100.0, 100.0)
raw_density = canopy_area / buffer_area * 100.0
density_pct = np.minimum(raw_density, 100.0)
# Symptom of the assumed-disjoint TOW/NFI union being violated (or of
# overlapping crowns inside one buffer): clipped areas alone cannot exceed the
# buffer unless polygons overlap. Surface it rather than hide it behind the cap.
over_count = int(np.count_nonzero(raw_density > 100.0))
if over_count:
print(
f" note: {over_count:,} postcode(s) exceeded 100% raw canopy and were "
"capped — indicates overlapping TOW/NFI canopy within the buffer"
)
mean_height = np.divide(
height_weighted_sum,
height_weight,
@ -492,7 +474,6 @@ def _finalize_metrics(
"postcode": points["postcode"],
area_col: canopy_area.round(1).astype(np.float32),
density_col: density_pct.round(1).astype(np.float32),
count_col: feature_count.astype(np.uint32),
height_col: np.round(mean_height, 1).astype(np.float32),
}
).with_columns(
@ -500,181 +481,9 @@ def _finalize_metrics(
)
def _clean_key_expr(column: str) -> pl.Expr:
return (
pl.col(column)
.fill_null("")
.str.to_uppercase()
.str.replace_all(r"[^A-Z0-9]+", " ")
.str.replace_all(r"\s+", " ")
.str.strip_chars()
)
def _latest_price_paid_addresses(price_paid_path: Path) -> pl.LazyFrame:
return (
pl.scan_parquet(price_paid_path)
.select(
pl.col("postcode").str.strip_chars().str.to_uppercase().alias("postcode"),
"paon",
"saon",
"street",
"locality",
"town_city",
"district",
"county",
"date_of_transfer",
)
.filter(pl.col("postcode").is_not_null())
.filter(pl.col("street").is_not_null())
.filter(_clean_key_expr("street") != "")
.with_columns(
pl.concat_str(
[pl.col("saon"), pl.col("paon"), pl.col("street")],
separator=" ",
ignore_nulls=True,
)
.str.replace_all(r"\s+", " ")
.str.strip_chars()
.alias("pp_address"),
)
.filter(pl.col("pp_address").is_not_null())
.sort("date_of_transfer")
.group_by("postcode", "pp_address", maintain_order=True)
.agg(
pl.col("street").last(),
pl.col("locality").last(),
pl.col("town_city").last(),
pl.col("district").last(),
pl.col("county").last(),
)
.with_columns(
pl.concat_str(
[
_clean_key_expr("street"),
_clean_key_expr("town_city"),
_clean_key_expr("district"),
_clean_key_expr("county"),
],
separator="|",
).alias("street_key")
)
)
def _weighted_mean_expr(column: str, weight: str) -> pl.Expr:
valid = pl.col(column).is_not_null() & ~pl.col(column).is_nan()
numerator = pl.when(valid).then(pl.col(column) * pl.col(weight)).sum()
denominator = pl.when(valid).then(pl.col(weight)).sum()
return pl.when(denominator > 0).then(numerator / denominator).otherwise(None)
def _write_street_rollups(
postcode_metrics: pl.DataFrame,
price_paid_path: Path,
output_streets: Path | None,
output_addresses: Path | None,
radius_m: int,
) -> None:
if output_streets is None and output_addresses is None:
return
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
metrics = postcode_metrics.lazy()
addresses = _latest_price_paid_addresses(price_paid_path).join(
metrics, on="postcode", how="inner"
)
per_postcode = (
addresses.group_by(
"street_key",
"postcode",
"street",
"locality",
"town_city",
"district",
"county",
)
.agg(
pl.len().alias("address_count"),
pl.col(density_col).first(),
pl.col(area_col).first(),
pl.col(count_col).first(),
pl.col(height_col).first(),
)
.collect()
)
streets = (
per_postcode.lazy()
.group_by("street_key")
.agg(
pl.col("street").first(),
pl.col("locality").first(),
pl.col("town_city").first(),
pl.col("district").first(),
pl.col("county").first(),
pl.col("postcode").n_unique().alias("postcode_count"),
pl.col("address_count").sum().alias("address_count"),
_weighted_mean_expr(density_col, "address_count")
.round(1)
.cast(pl.Float32)
.alias(STREET_TREE_COVERAGE_COL),
_weighted_mean_expr(area_col, "address_count")
.round(1)
.cast(pl.Float32)
.alias(f"Street average {area_col}"),
_weighted_mean_expr(count_col, "address_count")
.round(1)
.cast(pl.Float32)
.alias(f"Street average {count_col}"),
_weighted_mean_expr(height_col, "address_count")
.round(1)
.cast(pl.Float32)
.alias(f"Street average {height_col}"),
)
.with_columns(
_coverage_percentile_expr(
STREET_TREE_COVERAGE_COL,
STREET_TREE_DENSITY_COL,
)
)
.sort("street_key")
.collect()
)
if output_addresses is not None:
output_addresses.parent.mkdir(parents=True, exist_ok=True)
address_output = addresses.join(
streets.lazy().select(
"street_key",
STREET_TREE_COVERAGE_COL,
STREET_TREE_DENSITY_COL,
),
on="street_key",
how="left",
)
address_output.sink_parquet(output_addresses, compression="zstd")
print(f"Wrote address tree-density join: {output_addresses}")
if output_streets is not None:
output_streets.parent.mkdir(parents=True, exist_ok=True)
streets.write_parquet(output_streets, compression="zstd")
print(f"Wrote street tree-density rollup: {output_streets}")
def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
if value is None:
return None
if value.lower() == "all":
return None
parts = tuple(part.strip() for part in value.split(",") if part.strip())
return parts or None
def main() -> None:
parser = argparse.ArgumentParser(
description="Build postcode and street tree-density metrics from FR_TOW_V1_ALL.zip"
description="Build postcode-level tree-density metrics from FR_TOW_V1_ALL.zip"
)
parser.add_argument(
"--tow-zip",
@ -716,35 +525,17 @@ def main() -> None:
default=Path("property-data/arcgis_data.parquet"),
help="Postcode centroid parquet with east1m/north1m columns",
)
parser.add_argument(
"--price-paid",
type=Path,
default=None,
help="Optional Price Paid parquet used to roll postcode metrics up to streets",
)
parser.add_argument(
"--output-postcodes",
type=Path,
required=True,
help="Output postcode-level tree-density parquet",
)
parser.add_argument(
"--output-streets",
type=Path,
default=None,
help="Optional output street-level tree-density parquet",
)
parser.add_argument(
"--output-addresses",
type=Path,
default=None,
help="Optional output address/street join parquet keyed by postcode and pp_address",
)
parser.add_argument(
"--radius-m",
type=int,
default=50,
help="Radius around each postcode centroid used as the street-scale buffer",
help="Radius around each postcode centroid used as the extended buffer",
)
parser.add_argument(
"--layers",
@ -757,12 +548,6 @@ def main() -> None:
default=65_536,
help="Arrow batch size for reading TOW features",
)
parser.add_argument(
"--workers",
type=int,
default=-1,
help="Worker count passed to scipy cKDTree.query_ball_point",
)
parser.add_argument(
"--max-postcodes",
type=int,
@ -783,9 +568,6 @@ def main() -> None:
)
args = parser.parse_args()
if (args.output_streets or args.output_addresses) and args.price_paid is None:
raise SystemExit("--price-paid is required when writing street/address outputs")
if args.radius_m <= 0:
raise SystemExit("--radius-m must be greater than zero")
@ -797,36 +579,32 @@ def main() -> None:
n_points = points.height
canopy_area = np.zeros(n_points, dtype=np.float64)
feature_count = np.zeros(n_points, dtype=np.uint32)
height_weighted_sum = np.zeros(n_points, dtype=np.float64)
height_weight = np.zeros(n_points, dtype=np.float64)
_accumulate_tree_metrics(
circles, tree = _postcode_buffers(points, args.radius_m)
_accumulate_tow_metrics(
dataset_path=dataset_path,
points=points,
radius_m=args.radius_m,
circles=circles,
tree=tree,
canopy_area=canopy_area,
height_weighted_sum=height_weighted_sum,
height_weight=height_weight,
batch_size=args.batch_size,
layer_names=layer_names,
max_features_per_layer=args.max_features_per_layer,
workers=args.workers,
canopy_area=canopy_area,
feature_count=feature_count,
height_weighted_sum=height_weighted_sum,
height_weight=height_weight,
)
if args.nfi_zip is not None and args.nfi_zip.exists():
nfi_path = _nfi_dataset_path(
args.nfi_zip, args.nfi_extract_dir, args.force_extract, args.use_vsizip
)
circles, nfi_tree = _postcode_buffers(points, args.radius_m)
_accumulate_nfi_metrics(
dataset_path=nfi_path,
circles=circles,
tree=nfi_tree,
tree=tree,
canopy_area=canopy_area,
feature_count=feature_count,
radius_m=args.radius_m,
batch_size=args.batch_size,
max_nfi_features=args.max_nfi_features,
)
@ -836,7 +614,6 @@ def main() -> None:
postcode_metrics = _finalize_metrics(
points,
canopy_area,
feature_count,
height_weighted_sum,
height_weight,
args.radius_m,
@ -849,14 +626,14 @@ def main() -> None:
postcode_metrics.write_parquet(args.output_postcodes, compression="zstd")
print(f"\nWrote postcode tree-density metrics: {args.output_postcodes}")
if args.price_paid is not None:
_write_street_rollups(
postcode_metrics=postcode_metrics,
price_paid_path=args.price_paid,
output_streets=args.output_streets,
output_addresses=args.output_addresses,
radius_m=args.radius_m,
)
def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
if value is None:
return None
if value.lower() == "all":
return None
parts = tuple(part.strip() for part in value.split(",") if part.strip())
return parts or None
if __name__ == "__main__":