{
"cells": [
{
"cell_type": "markdown",
"id": "title",
"metadata": {},
"source": [
"# Street tree density visual check\n",
"\n",
"This notebook picks three postcodes with a fixed random seed and shows only the final map plus the tree-density percentage for each postcode. The percentage is read from the production output generated by `pipeline.transform.tree_density`; the map uses the same TOW type filter, postcode centroid, 50m buffer, and TOW polygon centroid rule used by that pipeline."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "imports-config",
"metadata": {
"execution": {
"iopub.execute_input": "2026-05-12T20:42:01.528837Z",
"iopub.status.busy": "2026-05-12T20:42:01.528729Z",
"iopub.status.idle": "2026-05-12T20:42:01.986980Z",
"shell.execute_reply": "2026-05-12T20:42:01.986661Z"
}
},
"outputs": [],
"source": [
"from contextlib import redirect_stdout\n",
"from html import escape\n",
"import io\n",
"from pathlib import Path\n",
"import json\n",
"import math\n",
"import sys\n",
"\n",
"import folium\n",
"import numpy as np\n",
"import polars as pl\n",
"import pyogrio\n",
"import shapely\n",
"from IPython.display import HTML\n",
"from pyproj import Transformer\n",
"from shapely.geometry import Point, mapping, shape\n",
"from shapely.ops import transform as shapely_transform\n",
"\n",
"ROOT = Path.cwd()\n",
"if not (ROOT / \"pipeline\").exists() and (ROOT.parent / \"pipeline\").exists():\n",
" ROOT = ROOT.parent\n",
"if str(ROOT) not in sys.path:\n",
" sys.path.insert(0, str(ROOT))\n",
"\n",
"from pipeline.transform.tree_density import (\n",
" DEFAULT_TOW_TYPES,\n",
" _layers,\n",
" _metric_columns,\n",
" _parse_csv_arg,\n",
" _postcode_points,\n",
" _tow_dataset_path,\n",
" _where_for_tow_types,\n",
")\n",
"\n",
"DATA_DIR = ROOT / \"property-data\"\n",
"TOW_ZIP = DATA_DIR / \"FR_TOW_V1_ALL.zip\"\n",
"EXTRACT_DIR = DATA_DIR / \"fr_tow_v1_all\"\n",
"ARCGIS = DATA_DIR / \"arcgis_data.parquet\"\n",
"POSTCODE_TREE_DENSITY = DATA_DIR / \"tree_density_by_postcode.parquet\"\n",
"POSTCODE_BOUNDARY_UNITS = DATA_DIR / \"postcode_boundaries\" / \"units\"\n",
"OUTPUT_HTML = ROOT / \"analyses\" / \"tree_density_three_postcodes_map.html\"\n",
"\n",
"RANDOM_SEED = 20260512\n",
"N_POSTCODES = 3\n",
"RADIUS_M = 50\n",
"\n",
"tow_types = _parse_csv_arg(\",\".join(DEFAULT_TOW_TYPES))\n",
"density_col, area_col, count_col, height_col = _metric_columns(RADIUS_M)\n",
"buffer_area = math.pi * RADIUS_M**2\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "helpers",
"metadata": {
"execution": {
"iopub.execute_input": "2026-05-12T20:42:01.988203Z",
"iopub.status.busy": "2026-05-12T20:42:01.988094Z",
"iopub.status.idle": "2026-05-12T20:42:02.015890Z",
"shell.execute_reply": "2026-05-12T20:42:02.015642Z"
}
},
"outputs": [],
"source": [
"_to_wgs84 = Transformer.from_crs(\"EPSG:27700\", \"EPSG:4326\", always_xy=True)\n",
"\n",
"\n",
"def bng_geom_to_wgs84(geom):\n",
" return shapely_transform(_to_wgs84.transform, geom)\n",
"\n",
"\n",
"def postcode_boundary_wgs84(postcode: str):\n",
" district = postcode.split()[0]\n",
" boundary_path = POSTCODE_BOUNDARY_UNITS / f\"{district}.geojson\"\n",
" mapit_code = postcode.replace(\" \", \"\")\n",
" with boundary_path.open() as f:\n",
" collection = json.load(f)\n",
"\n",
" for feature in collection[\"features\"]:\n",
" props = feature.get(\"properties\", {})\n",
" if props.get(\"postcodes\") == postcode or props.get(\"mapit_code\") == mapit_code:\n",
" return shape(feature[\"geometry\"])\n",
"\n",
" raise ValueError(f\"Postcode boundary not found for {postcode}\")\n",
"\n",
"\n",
"def tow_features_for_postcode(dataset_path: str, layer_names: list[str], x: float, y: float):\n",
" bbox = (x - RADIUS_M, y - RADIUS_M, x + RADIUS_M, y + RADIUS_M)\n",
" rows = []\n",
" where = _where_for_tow_types(tow_types)\n",
"\n",
" for layer in layer_names:\n",
" with pyogrio.open_arrow(\n",
" dataset_path,\n",
" layer=layer,\n",
" columns=[\"Woodland_Type\", \"TOW_Area_M\", \"MEANHT\"],\n",
" where=where,\n",
" bbox=bbox,\n",
" batch_size=4096,\n",
" use_pyarrow=True,\n",
" ) as (_meta, reader):\n",
" for batch in reader:\n",
" names = batch.schema.names\n",
" area = np.asarray(\n",
" batch.column(names.index(\"TOW_Area_M\")).to_numpy(zero_copy_only=False),\n",
" dtype=np.float64,\n",
" )\n",
" height = np.asarray(\n",
" batch.column(names.index(\"MEANHT\")).to_numpy(zero_copy_only=False),\n",
" dtype=np.float64,\n",
" )\n",
" woodland_type = batch.column(names.index(\"Woodland_Type\")).to_pylist()\n",
" geometry = np.asarray(\n",
" batch.column(names.index(\"SHAPE\")).to_numpy(zero_copy_only=False),\n",
" dtype=object,\n",
" )\n",
"\n",
" geoms = shapely.from_wkb(geometry)\n",
" centroids = shapely.centroid(geoms)\n",
" centroid_x = shapely.get_x(centroids)\n",
" centroid_y = shapely.get_y(centroids)\n",
" valid = np.isfinite(area) & (area > 0) & np.isfinite(centroid_x) & np.isfinite(centroid_y)\n",
"\n",
" for i in np.flatnonzero(valid):\n",
" distance_m = float(np.hypot(centroid_x[i] - x, centroid_y[i] - y))\n",
" rows.append(\n",
" {\n",
" \"layer\": layer,\n",
" \"geometry_bng\": geoms[i],\n",
" \"centroid_bng\": Point(float(centroid_x[i]), float(centroid_y[i])),\n",
" \"woodland_type\": woodland_type[i],\n",
" \"area_m2\": float(area[i]),\n",
" \"mean_height_m\": None if not np.isfinite(height[i]) else float(height[i]),\n",
" \"distance_m\": distance_m,\n",
" \"counted\": distance_m <= RADIUS_M,\n",
" }\n",
" )\n",
" return rows\n",
"\n",
"\n",
"def build_postcode_map(dataset_path: str, layer_names: list[str], row: dict) -> str:\n",
" postcode = row[\"postcode\"]\n",
" point_bng = Point(row[\"x\"], row[\"y\"])\n",
" point_lon, point_lat = _to_wgs84.transform(point_bng.x, point_bng.y)\n",
" buffer_bng = point_bng.buffer(RADIUS_M, resolution=96)\n",
" boundary = postcode_boundary_wgs84(postcode)\n",
" features = tow_features_for_postcode(dataset_path, layer_names, point_bng.x, point_bng.y)\n",
"\n",
" counted_area = sum(feature[\"area_m2\"] for feature in features if feature[\"counted\"])\n",
" visual_density = round(min(counted_area / buffer_area * 100, 100), 1)\n",
"\n",
" m = folium.Map(\n",
" location=[point_lat, point_lon],\n",
" zoom_start=18,\n",
" tiles=\"CartoDB positron\",\n",
" control_scale=True,\n",
" width=\"100%\",\n",
" height=\"430px\",\n",
" )\n",
"\n",
" folium.GeoJson(\n",
" mapping(boundary),\n",
" name=\"postcode boundary\",\n",
" style_function=lambda _feature: {\n",
" \"color\": \"#2563eb\",\n",
" \"weight\": 3,\n",
" \"fillColor\": \"#93c5fd\",\n",
" \"fillOpacity\": 0.10,\n",
" },\n",
" ).add_to(m)\n",
" folium.GeoJson(\n",
" mapping(bng_geom_to_wgs84(buffer_bng)),\n",
" name=\"50m buffer\",\n",
" style_function=lambda _feature: {\n",
" \"color\": \"#f97316\",\n",
" \"weight\": 3,\n",
" \"fillColor\": \"#fed7aa\",\n",
" \"fillOpacity\": 0.18,\n",
" },\n",
" ).add_to(m)\n",
"\n",
" counted_group = folium.FeatureGroup(name=\"counted foliage\", show=True)\n",
" nearby_group = folium.FeatureGroup(name=\"nearby, not counted\", show=False)\n",
"\n",
" for index, feature in enumerate(features, start=1):\n",
" group = counted_group if feature[\"counted\"] else nearby_group\n",
" geom_wgs84 = bng_geom_to_wgs84(feature[\"geometry_bng\"])\n",
" centroid_lon, centroid_lat = _to_wgs84.transform(feature[\"centroid_bng\"].x, feature[\"centroid_bng\"].y)\n",
" style = {\n",
" \"color\": \"#15803d\" if feature[\"counted\"] else \"#6b7280\",\n",
" \"weight\": 2 if feature[\"counted\"] else 1,\n",
" \"fillColor\": \"#22c55e\" if feature[\"counted\"] else \"#9ca3af\",\n",
" \"fillOpacity\": 0.45 if feature[\"counted\"] else 0.20,\n",
" }\n",
" popup_html = (\n",
" f\"TOW polygon {index}
\"\n",
" f\"Status: {'counted' if feature['counted'] else 'not counted'}
\"\n",
" f\"Type: {feature['woodland_type']}
\"\n",
" f\"Area: {feature['area_m2']:.1f} sqm
\"\n",
" f\"Centroid distance: {feature['distance_m']:.1f} m\"\n",
" )\n",
" folium.GeoJson(\n",
" {\n",
" \"type\": \"Feature\",\n",
" \"geometry\": mapping(geom_wgs84),\n",
" \"properties\": {\"popup\": popup_html},\n",
" },\n",
" style_function=lambda _feature, style=style: style,\n",
" tooltip=\"counted foliage\" if feature[\"counted\"] else \"nearby foliage, not counted\",\n",
" popup=folium.Popup(popup_html, max_width=280),\n",
" ).add_to(group)\n",
" folium.CircleMarker(\n",
" [centroid_lat, centroid_lon],\n",
" radius=3,\n",
" color=\"#14532d\" if feature[\"counted\"] else \"#4b5563\",\n",
" fill=True,\n",
" fill_opacity=0.95,\n",
" tooltip=f\"TOW centroid: {feature['distance_m']:.1f}m\",\n",
" ).add_to(group)\n",
"\n",
" counted_group.add_to(m)\n",
" nearby_group.add_to(m)\n",
" folium.CircleMarker(\n",
" [point_lat, point_lon],\n",
" radius=7,\n",
" color=\"#1d4ed8\",\n",
" fill=True,\n",
" fill_color=\"#1d4ed8\",\n",
" fill_opacity=1,\n",
" tooltip=f\"{postcode} centroid\",\n",
" ).add_to(m)\n",
" folium.LayerControl(collapsed=True).add_to(m)\n",
"\n",
" title = escape(\n",
" f\"{postcode}: {row[density_col]:.1f}% | {int(row[count_col])} features | \"\n",
" f\"visual check {visual_density:.1f}%\"\n",
" )\n",
" iframe = escape(m.get_root().render(), quote=True)\n",
" return f\"\"\"\n",
" {title}
\n",
" \n",
"