{ "cells": [ { "cell_type": "markdown", "id": "title", "metadata": {}, "source": [ "# Polygon-intersection tree density prototype\n", "\n", "This notebook prototypes the next algorithm: expand each postcode boundary by `N` metres, intersect that expanded polygon with Forest Research Trees Outside Woodland polygons, and sum the actual intersecting canopy area.\n", "\n", "The map shows three seeded random postcodes side by side. Each panel shows the original postcode boundary, the expanded postcode boundary, the intersecting foliage polygons, and the resulting percentage." ] }, { "cell_type": "code", "execution_count": 1, "id": "imports-config", "metadata": { "execution": { "iopub.execute_input": "2026-05-12T20:47:25.026395Z", "iopub.status.busy": "2026-05-12T20:47:25.026327Z", "iopub.status.idle": "2026-05-12T20:47:26.779075Z", "shell.execute_reply": "2026-05-12T20:47:26.778675Z" } }, "outputs": [], "source": [ "from contextlib import redirect_stdout\n", "from html import escape\n", "import io\n", "from pathlib import Path\n", "import json\n", "import sys\n", "\n", "import folium\n", "import numpy as np\n", "import polars as pl\n", "import pyogrio\n", "import shapely\n", "from IPython.display import HTML\n", "from pyproj import Transformer\n", "from shapely.geometry import mapping, shape\n", "from shapely.ops import transform as shapely_transform\n", "\n", "ROOT = Path.cwd()\n", "if not (ROOT / \"pipeline\").exists() and (ROOT.parent / \"pipeline\").exists():\n", " ROOT = ROOT.parent\n", "if str(ROOT) not in sys.path:\n", " sys.path.insert(0, str(ROOT))\n", "\n", "from pipeline.transform.tree_density import ( # noqa: E402\n", " DEFAULT_TOW_TYPES,\n", " _layers,\n", " _metric_columns,\n", " _parse_csv_arg,\n", " _postcode_points,\n", " _tow_dataset_path,\n", " _where_for_tow_types,\n", ")\n", "\n", "DATA_DIR = ROOT / \"property-data\"\n", "TOW_ZIP = DATA_DIR / \"FR_TOW_V1_ALL.zip\"\n", "EXTRACT_DIR = DATA_DIR / \"fr_tow_v1_all\"\n", "ARCGIS = DATA_DIR / \"arcgis_data.parquet\"\n", "POSTCODE_TREE_DENSITY = DATA_DIR / \"tree_density_by_postcode.parquet\"\n", "POSTCODE_BOUNDARY_UNITS = DATA_DIR / \"postcode_boundaries\" / \"units\"\n", "OUTPUT_HTML = ROOT / \"analyses\" / \"tree_density_polygon_intersection_map.html\"\n", "\n", "RANDOM_SEED = 20260512\n", "N_POSTCODES = 3\n", "BOUNDARY_EXPANSION_M = 50\n", "CANDIDATE_SAMPLE_SIZE = 60\n", "\n", "tow_types = _parse_csv_arg(\",\".join(DEFAULT_TOW_TYPES))\n", "density_col, area_col, count_col, height_col = _metric_columns(50)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "helpers", "metadata": { "execution": { "iopub.execute_input": "2026-05-12T20:47:26.780431Z", "iopub.status.busy": "2026-05-12T20:47:26.780263Z", "iopub.status.idle": "2026-05-12T20:47:26.846177Z", "shell.execute_reply": "2026-05-12T20:47:26.845648Z" } }, "outputs": [], "source": [ "_to_bng = Transformer.from_crs(\"EPSG:4326\", \"EPSG:27700\", always_xy=True)\n", "_to_wgs84 = Transformer.from_crs(\"EPSG:27700\", \"EPSG:4326\", always_xy=True)\n", "\n", "\n", "def to_bng(geom):\n", " return shapely_transform(_to_bng.transform, geom)\n", "\n", "\n", "def to_wgs84(geom):\n", " return shapely_transform(_to_wgs84.transform, geom)\n", "\n", "\n", "def postcode_boundary_wgs84(postcode: str):\n", " district = postcode.split()[0]\n", " boundary_path = POSTCODE_BOUNDARY_UNITS / f\"{district}.geojson\"\n", " mapit_code = postcode.replace(\" \", \"\")\n", " with boundary_path.open() as f:\n", " collection = json.load(f)\n", "\n", " for feature in collection[\"features\"]:\n", " props = feature.get(\"properties\", {})\n", " if props.get(\"postcodes\") == postcode or props.get(\"mapit_code\") == mapit_code:\n", " return shape(feature[\"geometry\"])\n", "\n", " raise ValueError(f\"Postcode boundary not found for {postcode}\")\n", "\n", "\n", "def postcode_boundary_bng(postcode: str):\n", " geom = to_bng(postcode_boundary_wgs84(postcode))\n", " if not geom.is_valid:\n", " geom = shapely.make_valid(geom)\n", " return geom\n", "\n", "\n", "def intersecting_tow_features(dataset_path: str, layer_names: list[str], target_bng):\n", " bbox = target_bng.bounds\n", " rows = []\n", " where = _where_for_tow_types(tow_types)\n", "\n", " for layer in layer_names:\n", " with pyogrio.open_arrow(\n", " dataset_path,\n", " layer=layer,\n", " columns=[\"Woodland_Type\", \"TOW_Area_M\", \"MEANHT\"],\n", " where=where,\n", " bbox=bbox,\n", " batch_size=4096,\n", " use_pyarrow=True,\n", " ) as (_meta, reader):\n", " for batch in reader:\n", " names = batch.schema.names\n", " tow_area = np.asarray(\n", " batch.column(names.index(\"TOW_Area_M\")).to_numpy(zero_copy_only=False),\n", " dtype=np.float64,\n", " )\n", " mean_height = np.asarray(\n", " batch.column(names.index(\"MEANHT\")).to_numpy(zero_copy_only=False),\n", " dtype=np.float64,\n", " )\n", " woodland_type = batch.column(names.index(\"Woodland_Type\")).to_pylist()\n", " geometry = np.asarray(\n", " batch.column(names.index(\"SHAPE\")).to_numpy(zero_copy_only=False),\n", " dtype=object,\n", " )\n", "\n", " geoms = shapely.from_wkb(geometry)\n", " valid = np.isfinite(tow_area) & (tow_area > 0)\n", " for i in np.flatnonzero(valid):\n", " geom = geoms[i]\n", " if geom is None or shapely.is_empty(geom):\n", " continue\n", " if not shapely.is_valid(geom):\n", " geom = shapely.make_valid(geom)\n", " if not shapely.intersects(geom, target_bng):\n", " continue\n", "\n", " intersection = shapely.intersection(geom, target_bng)\n", " intersection_area = float(shapely.area(intersection))\n", " if intersection_area <= 0:\n", " continue\n", "\n", " rows.append(\n", " {\n", " \"layer\": layer,\n", " \"geometry_bng\": geom,\n", " \"intersection_bng\": intersection,\n", " \"woodland_type\": woodland_type[i],\n", " \"tow_area_m2\": float(tow_area[i]),\n", " \"intersection_area_m2\": intersection_area,\n", " \"mean_height_m\": None if not np.isfinite(mean_height[i]) else float(mean_height[i]),\n", " }\n", " )\n", " return rows\n", "\n", "\n", "def build_postcode_card(dataset_path: str, layer_names: list[str], postcode: str) -> tuple[str, dict]:\n", " original_bng = postcode_boundary_bng(postcode)\n", " expanded_bng = original_bng.buffer(BOUNDARY_EXPANSION_M)\n", " if not expanded_bng.is_valid:\n", " expanded_bng = shapely.make_valid(expanded_bng)\n", "\n", " features = intersecting_tow_features(dataset_path, layer_names, expanded_bng)\n", " intersecting_area = sum(feature[\"intersection_area_m2\"] for feature in features)\n", " expanded_area = float(shapely.area(expanded_bng))\n", " polygon_density = 0.0 if expanded_area <= 0 else min(intersecting_area / expanded_area * 100, 100)\n", "\n", " center_lon, center_lat = to_wgs84(expanded_bng.centroid).coords[0]\n", " m = folium.Map(\n", " location=[center_lat, center_lon],\n", " zoom_start=18,\n", " tiles=\"CartoDB positron\",\n", " control_scale=True,\n", " width=\"100%\",\n", " height=\"430px\",\n", " )\n", "\n", " folium.GeoJson(\n", " mapping(to_wgs84(expanded_bng)),\n", " name=f\"expanded postcode boundary (+{BOUNDARY_EXPANSION_M}m)\",\n", " style_function=lambda _feature: {\n", " \"color\": \"#f97316\",\n", " \"weight\": 3,\n", " \"fillColor\": \"#fed7aa\",\n", " \"fillOpacity\": 0.18,\n", " },\n", " ).add_to(m)\n", " folium.GeoJson(\n", " mapping(to_wgs84(original_bng)),\n", " name=\"original postcode boundary\",\n", " style_function=lambda _feature: {\n", " \"color\": \"#2563eb\",\n", " \"weight\": 3,\n", " \"fillColor\": \"#93c5fd\",\n", " \"fillOpacity\": 0.12,\n", " },\n", " ).add_to(m)\n", "\n", " foliage_group = folium.FeatureGroup(name=\"foliage boundary\", show=True)\n", " intersection_group = folium.FeatureGroup(name=\"intersection area\", show=True)\n", " for index, feature in enumerate(features, start=1):\n", " popup_html = (\n", " f\"TOW polygon {index}
\"\n", " f\"Type: {feature['woodland_type']}
\"\n", " f\"Full TOW area: {feature['tow_area_m2']:.1f} sqm
\"\n", " f\"Intersecting area: {feature['intersection_area_m2']:.1f} sqm\"\n", " )\n", " folium.GeoJson(\n", " mapping(to_wgs84(feature[\"geometry_bng\"])),\n", " name=\"foliage boundary\",\n", " style_function=lambda _feature: {\n", " \"color\": \"#166534\",\n", " \"weight\": 1,\n", " \"fillColor\": \"#22c55e\",\n", " \"fillOpacity\": 0.16,\n", " },\n", " tooltip=\"foliage boundary\",\n", " popup=folium.Popup(popup_html, max_width=300),\n", " ).add_to(foliage_group)\n", " folium.GeoJson(\n", " mapping(to_wgs84(feature[\"intersection_bng\"])),\n", " name=\"intersection area\",\n", " style_function=lambda _feature: {\n", " \"color\": \"#14532d\",\n", " \"weight\": 2,\n", " \"fillColor\": \"#16a34a\",\n", " \"fillOpacity\": 0.58,\n", " },\n", " tooltip=\"counted intersection area\",\n", " popup=folium.Popup(popup_html, max_width=300),\n", " ).add_to(intersection_group)\n", "\n", " foliage_group.add_to(m)\n", " intersection_group.add_to(m)\n", " folium.LayerControl(collapsed=True).add_to(m)\n", "\n", " title = escape(\n", " f\"{postcode}: {polygon_density:.1f}% | \"\n", " f\"intersection {intersecting_area:.0f} sqm | +{BOUNDARY_EXPANSION_M}m\"\n", " )\n", " iframe = escape(m.get_root().render(), quote=True)\n", " card = f\"\"\"\n", "
\n", "

{title}

\n", " \n", "
\n", " \"\"\"\n", " return card, {\n", " \"postcode\": postcode,\n", " \"polygon_density_pct\": round(polygon_density, 1),\n", " \"intersecting_area_sqm\": round(intersecting_area, 1),\n", " \"expanded_boundary_area_sqm\": round(expanded_area, 1),\n", " \"intersecting_tow_features\": len(features),\n", " }\n", "\n", "\n", "def side_by_side_document(cards: list[str]) -> str:\n", " return f\"\"\"\n", " \n", " \n", " \n", " \n", " \n", " Polygon-intersection tree density prototype\n", " \n", " \n", " \n", "
{''.join(cards)}
\n", " \n", " \n", " \"\"\"\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "sample-and-render", "metadata": { "execution": { "iopub.execute_input": "2026-05-12T20:47:26.847213Z", "iopub.status.busy": "2026-05-12T20:47:26.847116Z", "iopub.status.idle": "2026-05-12T20:47:28.310447Z", "shell.execute_reply": "2026-05-12T20:47:28.310171Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (3, 5)
postcodepolygon_density_pctintersecting_area_sqmexpanded_boundary_area_sqmintersecting_tow_features
strf64f64f64i64
"B61 7EF"3.720421.5554408.7182
"B94 6HR"4.32281.352697.133
"BB1 9JJ"11.52888.425147.639
" ], "text/plain": [ "shape: (3, 5)\n", "┌──────────┬─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────┐\n", "│ postcode ┆ polygon_density_pct ┆ intersecting_area_s ┆ expanded_boundary_a ┆ intersecting_tow_fe │\n", "│ --- ┆ --- ┆ qm ┆ rea_sqm ┆ atures │\n", "│ str ┆ f64 ┆ --- ┆ --- ┆ --- │\n", "│ ┆ ┆ f64 ┆ f64 ┆ i64 │\n", "╞══════════╪═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════╡\n", "│ B61 7EF ┆ 3.7 ┆ 20421.5 ┆ 554408.7 ┆ 182 │\n", "│ B94 6HR ┆ 4.3 ┆ 2281.3 ┆ 52697.1 ┆ 33 │\n", "│ BB1 9JJ ┆ 11.5 ┆ 2888.4 ┆ 25147.6 ┆ 39 │\n", "└──────────┴─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────┘" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " Polygon-intersection tree density prototype\n", " \n", " \n", " \n", "
\n", "
\n", "

B61 7EF: 3.7% | intersection 20422 sqm | +50m

\n", " \n", "
\n", " \n", "
\n", "

B94 6HR: 4.3% | intersection 2281 sqm | +50m

\n", " \n", "
\n", " \n", "
\n", "

BB1 9JJ: 11.5% | intersection 2888 sqm | +50m

\n", " \n", "
\n", "
\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with redirect_stdout(io.StringIO()):\n", " dataset_path = _tow_dataset_path(TOW_ZIP, EXTRACT_DIR, force_extract=False, use_vsizip=False)\n", " postcode_points = _postcode_points(ARCGIS, max_postcodes=None)\n", "layer_names = _layers(dataset_path, selected_layers=None)\n", "\n", "existing_districts = {path.stem for path in POSTCODE_BOUNDARY_UNITS.glob(\"*.geojson\")}\n", "candidates = (\n", " pl.scan_parquet(POSTCODE_TREE_DENSITY)\n", " .filter(pl.col(count_col) > 0)\n", " .with_columns(pl.col(\"postcode\").str.split(\" \").list.first().alias(\"district\"))\n", " .filter(pl.col(\"district\").is_in(existing_districts))\n", " .select(\"postcode\")\n", " .collect()\n", " .sample(n=CANDIDATE_SAMPLE_SIZE, seed=RANDOM_SEED, shuffle=True)\n", " .join(postcode_points, on=\"postcode\", how=\"inner\")\n", ")\n", "\n", "cards = []\n", "summaries = []\n", "for row in candidates.to_dicts():\n", " card, summary = build_postcode_card(dataset_path, layer_names, row[\"postcode\"])\n", " if summary[\"intersecting_area_sqm\"] <= 0:\n", " continue\n", " cards.append(card)\n", " summaries.append(summary)\n", " if len(cards) == N_POSTCODES:\n", " break\n", "\n", "if len(cards) < N_POSTCODES:\n", " raise RuntimeError(f\"Only found {len(cards)} postcodes with intersecting TOW area\")\n", "\n", "html = side_by_side_document(cards)\n", "OUTPUT_HTML.write_text(html)\n", "display(pl.DataFrame(summaries))\n", "HTML(html)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.13" } }, "nbformat": 4, "nbformat_minor": 5 }