From 11711c57e6e3b783b7b6cb92e6605c29cbead9c8 Mon Sep 17 00:00:00 2001 From: Andras Schmelczer Date: Tue, 12 May 2026 22:00:56 +0100 Subject: [PATCH] lgtm --- .vscode/settings.json | 3 +- Makefile.data | 37 +- analyses/tree_density_methodology.ipynb | 3948 +++++++++++++++++ frontend/scripts/prerender.mjs | 4 +- frontend/src/components/home/HomePage.tsx | 22 +- .../src/components/home/ProductShowcase.tsx | 6 +- .../src/components/map/FeatureBrowser.tsx | 4 +- frontend/src/components/map/Filters.tsx | 67 +- frontend/src/components/map/HoverCard.tsx | 8 +- .../components/map/MobileBottomSheet.test.tsx | 2 +- .../src/components/map/MobileBottomSheet.tsx | 20 +- .../filters/ElectionVoteShareFilterCard.tsx | 2 +- .../map/filters/EthnicityFilterCard.tsx | 2 +- .../map/filters/PoiDistanceFilterCard.tsx | 7 +- .../map/filters/SpecificCrimeFilterCard.tsx | 2 +- .../components/ui/CollapsibleGroupHeader.tsx | 2 +- frontend/src/i18n/locales/de.ts | 5 + frontend/src/i18n/locales/en.ts | 107 +- frontend/src/i18n/locales/fr.ts | 5 + frontend/src/i18n/locales/hi.ts | 5 + frontend/src/i18n/locales/hu.ts | 5 + frontend/src/i18n/locales/zh.ts | 5 + frontend/src/index.css | 87 + frontend/src/lib/api.test.ts | 26 +- pipeline/transform/test_merge.py | 21 +- pipeline/transform/test_poi_proximity.py | 20 +- pipeline/transform/test_tree_density.py | 99 + pipeline/transform/tree_density.py | 635 +++ server-rs/src/og_middleware.rs | 4 +- server-rs/src/routes/export.rs | 4 +- video/render.sh | 8 +- video/src/browser.ts | 3 + video/src/dom.ts | 19 +- video/src/preflight.ts | 2 + video/src/runner.ts | 12 +- video/src/script.ts | 6 + video/src/storyboard.ts | 368 +- video/tts/synth.py | 44 +- 38 files changed, 5361 insertions(+), 265 deletions(-) create mode 100644 analyses/tree_density_methodology.ipynb create mode 100644 pipeline/transform/test_tree_density.py create mode 100644 pipeline/transform/tree_density.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 702822d..fcbc21a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,5 +8,6 @@ "**/target": true, "frontend/dist": true, "**/.task": true - } + }, + "python.terminal.activateEnvironment": false } \ No newline at end of file diff --git a/Makefile.data b/Makefile.data index cb28b17..f63d5d2 100644 --- a/Makefile.data +++ b/Makefile.data @@ -48,6 +48,10 @@ GREENSPACE := $(DATA_DIR)/greenspace_water.parquet OS_GREENSPACE := $(DATA_DIR)/os_greenspace.parquet PBF := $(DATA_DIR)/england-latest.osm.pbf FR_TOW := $(DATA_DIR)/FR_TOW_V1_ALL.zip +TREE_DENSITY_PC := $(DATA_DIR)/tree_density_by_postcode.parquet +TREE_DENSITY_STREETS := $(DATA_DIR)/tree_density_by_street.parquet +TREE_DENSITY_ADDR := $(DATA_DIR)/tree_density_by_address.parquet +OFS_REGISTER := $(DATA_DIR)/ofs_register.xlsx PLACES := $(DATA_DIR)/places.parquet LSOA_POP := $(DATA_DIR)/lsoa_population.parquet MEDIAN_AGE := $(DATA_DIR)/median_age.parquet @@ -62,16 +66,20 @@ MAP_ASSETS_STAMP := $(MAP_ASSETS_DIR)/.done PMTILES_VERSION := 1.22.3 +POI_PROXIMITY_DEPS := pipeline/transform/poi_proximity.py pipeline/utils/poi_counts.py +MERGE_DEPS := pipeline/transform/merge.py +TREE_DENSITY_DEPS := pipeline/transform/tree_density.py + # ── Phony aliases ───────────────────────────────────────────────────────────── .PHONY: prepare merge tiles \ download-arcgis download-price-paid download-deprivation download-ethnicity \ download-naptan download-pois download-grocery-retail-points download-ofsted download-broadband download-rental-prices \ download-postcodes download-noise download-inspire download-crime \ - download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \ + download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-fr-tow download-ofs-register download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \ download-map-assets \ transform-pois transform-epc-pp transform-crime transform-poi-proximity \ - transform-school-proximity \ + transform-school-proximity transform-tree-density \ generate-postcode-boundaries generate-travel-times prepare: $(PRICES_STAMP) download-places tiles generate-postcode-boundaries download-map-assets generate-travel-times @@ -98,6 +106,7 @@ download-greenspace: $(GREENSPACE) download-os-greenspace: $(OS_GREENSPACE) download-pbf: $(PBF) download-fr-tow: $(FR_TOW) +download-ofs-register: $(OFS_REGISTER) download-places: $(PLACES) download-lsoa-population: $(LSOA_POP) download-median-age: $(MEDIAN_AGE) @@ -110,6 +119,7 @@ transform-epc-pp: $(EPC_PP) transform-crime: $(CRIME) transform-poi-proximity: $(POI_PROXIMITY) transform-school-proximity: $(SCHOOL_PROX) +transform-tree-density: $(TREE_DENSITY_ADDR) generate-postcode-boundaries: $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) uv run python -m pipeline.transform.postcode_boundaries \ --uprn $(UPRN_LOOKUP) \ @@ -168,6 +178,11 @@ $(FR_TOW): curl -L -A "Mozilla/5.0" -o $@.tmp "https://www.mediafire.com/file_premium/p5fve6wswwwjqrq/FR_TOW_V1_ALL.zip/file" mv $@.tmp $@ +$(OFS_REGISTER): + @mkdir -p $(DATA_DIR) + curl -fL -A "Mozilla/5.0" -o $@.tmp https://register-api.officeforstudents.org.uk/api/Download/ + mv $@.tmp $@ + $(POIS_RAW): $(PBF) $(ENGLAND_BOUNDARY) uv run python -m pipeline.download.pois --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY) @@ -209,8 +224,8 @@ $(GREENSPACE): $(PBF) $(OS_GREENSPACE): uv run python -m pipeline.download.os_greenspace --output $@ -$(PLACES): $(PBF) $(ENGLAND_BOUNDARY) $(NAPTAN) - uv run python -m pipeline.download.places --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY) --naptan $(NAPTAN) +$(PLACES): $(PBF) $(ENGLAND_BOUNDARY) $(NAPTAN) $(OFS_REGISTER) $(ARCGIS) + uv run python -m pipeline.download.places --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY) --naptan $(NAPTAN) --university-register $(OFS_REGISTER) --postcodes $(ARCGIS) $(LSOA_POP): uv run python -m pipeline.download.lsoa_population --output $@ @@ -243,12 +258,21 @@ $(EPC_PP): $(PRICE_PAID) $(EPC) $(CRIME): $(CRIME_STAMP) uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $@ -$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) +$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DEPS) uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --greenspace $(OS_GREENSPACE) --output $@ $(SCHOOL_PROX): $(OFSTED) $(ARCGIS) uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@ +$(TREE_DENSITY_ADDR): $(FR_TOW) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS) + uv run python -m pipeline.transform.tree_density \ + --tow-zip $(FR_TOW) \ + --arcgis $(ARCGIS) \ + --price-paid $(PRICE_PAID) \ + --output-postcodes $(TREE_DENSITY_PC) \ + --output-streets $(TREE_DENSITY_STREETS) \ + --output-addresses $@ + # Postcode boundaries require manual generation — fail with instructions $(PC_BOUNDARIES): @echo "" @@ -267,7 +291,7 @@ $(PC_BOUNDARIES): # ── Final merge → postcode.parquet + properties.parquet ────────────────────── $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \ - $(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(RENTAL) $(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) + $(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(RENTAL) $(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_ADDR) $(MERGE_DEPS) uv run python -m pipeline.transform.merge \ --epc-pp $(EPC_PP) \ --arcgis $(ARCGIS) \ @@ -282,6 +306,7 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \ --lsoa-population $(LSOA_POP) \ --median-age $(MEDIAN_AGE) \ --election-results $(ELECTION) \ + --tree-density-addresses $(TREE_DENSITY_ADDR) \ --output-postcodes $(POSTCODES_PQ) \ --output-properties $(PROPERTIES_PQ) @touch $@ diff --git a/analyses/tree_density_methodology.ipynb b/analyses/tree_density_methodology.ipynb new file mode 100644 index 0000000..424e220 --- /dev/null +++ b/analyses/tree_density_methodology.ipynb @@ -0,0 +1,3948 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "title", + "metadata": {}, + "source": [ + "# Street tree density visual check\n", + "\n", + "This notebook picks three postcodes with a fixed random seed and shows only the final map plus the tree-density percentage for each postcode. The percentage is read from the production output generated by `pipeline.transform.tree_density`; the map uses the same TOW type filter, postcode centroid, 50m buffer, and TOW polygon centroid rule used by that pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "imports-config", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-12T20:42:01.528837Z", + "iopub.status.busy": "2026-05-12T20:42:01.528729Z", + "iopub.status.idle": "2026-05-12T20:42:01.986980Z", + "shell.execute_reply": "2026-05-12T20:42:01.986661Z" + } + }, + "outputs": [], + "source": [ + "from contextlib import redirect_stdout\n", + "from html import escape\n", + "import io\n", + "from pathlib import Path\n", + "import json\n", + "import math\n", + "import sys\n", + "\n", + "import folium\n", + "import numpy as np\n", + "import polars as pl\n", + "import pyogrio\n", + "import shapely\n", + "from IPython.display import HTML\n", + "from pyproj import Transformer\n", + "from shapely.geometry import Point, mapping, shape\n", + "from shapely.ops import transform as shapely_transform\n", + "\n", + "ROOT = Path.cwd()\n", + "if not (ROOT / \"pipeline\").exists() and (ROOT.parent / \"pipeline\").exists():\n", + " ROOT = ROOT.parent\n", + "if str(ROOT) not in sys.path:\n", + " sys.path.insert(0, str(ROOT))\n", + "\n", + "from pipeline.transform.tree_density import (\n", + " DEFAULT_TOW_TYPES,\n", + " _layers,\n", + " _metric_columns,\n", + " _parse_csv_arg,\n", + " _postcode_points,\n", + " _tow_dataset_path,\n", + " _where_for_tow_types,\n", + ")\n", + "\n", + "DATA_DIR = ROOT / \"property-data\"\n", + "TOW_ZIP = DATA_DIR / \"FR_TOW_V1_ALL.zip\"\n", + "EXTRACT_DIR = DATA_DIR / \"fr_tow_v1_all\"\n", + "ARCGIS = DATA_DIR / \"arcgis_data.parquet\"\n", + "POSTCODE_TREE_DENSITY = DATA_DIR / \"tree_density_by_postcode.parquet\"\n", + "POSTCODE_BOUNDARY_UNITS = DATA_DIR / \"postcode_boundaries\" / \"units\"\n", + "OUTPUT_HTML = ROOT / \"analyses\" / \"tree_density_three_postcodes_map.html\"\n", + "\n", + "RANDOM_SEED = 20260512\n", + "N_POSTCODES = 3\n", + "RADIUS_M = 50\n", + "\n", + "tow_types = _parse_csv_arg(\",\".join(DEFAULT_TOW_TYPES))\n", + "density_col, area_col, count_col, height_col = _metric_columns(RADIUS_M)\n", + "buffer_area = math.pi * RADIUS_M**2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "helpers", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-12T20:42:01.988203Z", + "iopub.status.busy": "2026-05-12T20:42:01.988094Z", + "iopub.status.idle": "2026-05-12T20:42:02.015890Z", + "shell.execute_reply": "2026-05-12T20:42:02.015642Z" + } + }, + "outputs": [], + "source": [ + "_to_wgs84 = Transformer.from_crs(\"EPSG:27700\", \"EPSG:4326\", always_xy=True)\n", + "\n", + "\n", + "def bng_geom_to_wgs84(geom):\n", + " return shapely_transform(_to_wgs84.transform, geom)\n", + "\n", + "\n", + "def postcode_boundary_wgs84(postcode: str):\n", + " district = postcode.split()[0]\n", + " boundary_path = POSTCODE_BOUNDARY_UNITS / f\"{district}.geojson\"\n", + " mapit_code = postcode.replace(\" \", \"\")\n", + " with boundary_path.open() as f:\n", + " collection = json.load(f)\n", + "\n", + " for feature in collection[\"features\"]:\n", + " props = feature.get(\"properties\", {})\n", + " if props.get(\"postcodes\") == postcode or props.get(\"mapit_code\") == mapit_code:\n", + " return shape(feature[\"geometry\"])\n", + "\n", + " raise ValueError(f\"Postcode boundary not found for {postcode}\")\n", + "\n", + "\n", + "def tow_features_for_postcode(dataset_path: str, layer_names: list[str], x: float, y: float):\n", + " bbox = (x - RADIUS_M, y - RADIUS_M, x + RADIUS_M, y + RADIUS_M)\n", + " rows = []\n", + " where = _where_for_tow_types(tow_types)\n", + "\n", + " for layer in layer_names:\n", + " with pyogrio.open_arrow(\n", + " dataset_path,\n", + " layer=layer,\n", + " columns=[\"Woodland_Type\", \"TOW_Area_M\", \"MEANHT\"],\n", + " where=where,\n", + " bbox=bbox,\n", + " batch_size=4096,\n", + " use_pyarrow=True,\n", + " ) as (_meta, reader):\n", + " for batch in reader:\n", + " names = batch.schema.names\n", + " area = np.asarray(\n", + " batch.column(names.index(\"TOW_Area_M\")).to_numpy(zero_copy_only=False),\n", + " dtype=np.float64,\n", + " )\n", + " height = np.asarray(\n", + " batch.column(names.index(\"MEANHT\")).to_numpy(zero_copy_only=False),\n", + " dtype=np.float64,\n", + " )\n", + " woodland_type = batch.column(names.index(\"Woodland_Type\")).to_pylist()\n", + " geometry = np.asarray(\n", + " batch.column(names.index(\"SHAPE\")).to_numpy(zero_copy_only=False),\n", + " dtype=object,\n", + " )\n", + "\n", + " geoms = shapely.from_wkb(geometry)\n", + " centroids = shapely.centroid(geoms)\n", + " centroid_x = shapely.get_x(centroids)\n", + " centroid_y = shapely.get_y(centroids)\n", + " valid = np.isfinite(area) & (area > 0) & np.isfinite(centroid_x) & np.isfinite(centroid_y)\n", + "\n", + " for i in np.flatnonzero(valid):\n", + " distance_m = float(np.hypot(centroid_x[i] - x, centroid_y[i] - y))\n", + " rows.append(\n", + " {\n", + " \"layer\": layer,\n", + " \"geometry_bng\": geoms[i],\n", + " \"centroid_bng\": Point(float(centroid_x[i]), float(centroid_y[i])),\n", + " \"woodland_type\": woodland_type[i],\n", + " \"area_m2\": float(area[i]),\n", + " \"mean_height_m\": None if not np.isfinite(height[i]) else float(height[i]),\n", + " \"distance_m\": distance_m,\n", + " \"counted\": distance_m <= RADIUS_M,\n", + " }\n", + " )\n", + " return rows\n", + "\n", + "\n", + "def build_postcode_map(dataset_path: str, layer_names: list[str], row: dict) -> str:\n", + " postcode = row[\"postcode\"]\n", + " point_bng = Point(row[\"x\"], row[\"y\"])\n", + " point_lon, point_lat = _to_wgs84.transform(point_bng.x, point_bng.y)\n", + " buffer_bng = point_bng.buffer(RADIUS_M, resolution=96)\n", + " boundary = postcode_boundary_wgs84(postcode)\n", + " features = tow_features_for_postcode(dataset_path, layer_names, point_bng.x, point_bng.y)\n", + "\n", + " counted_area = sum(feature[\"area_m2\"] for feature in features if feature[\"counted\"])\n", + " visual_density = round(min(counted_area / buffer_area * 100, 100), 1)\n", + "\n", + " m = folium.Map(\n", + " location=[point_lat, point_lon],\n", + " zoom_start=18,\n", + " tiles=\"CartoDB positron\",\n", + " control_scale=True,\n", + " width=\"100%\",\n", + " height=\"430px\",\n", + " )\n", + "\n", + " folium.GeoJson(\n", + " mapping(boundary),\n", + " name=\"postcode boundary\",\n", + " style_function=lambda _feature: {\n", + " \"color\": \"#2563eb\",\n", + " \"weight\": 3,\n", + " \"fillColor\": \"#93c5fd\",\n", + " \"fillOpacity\": 0.10,\n", + " },\n", + " ).add_to(m)\n", + " folium.GeoJson(\n", + " mapping(bng_geom_to_wgs84(buffer_bng)),\n", + " name=\"50m buffer\",\n", + " style_function=lambda _feature: {\n", + " \"color\": \"#f97316\",\n", + " \"weight\": 3,\n", + " \"fillColor\": \"#fed7aa\",\n", + " \"fillOpacity\": 0.18,\n", + " },\n", + " ).add_to(m)\n", + "\n", + " counted_group = folium.FeatureGroup(name=\"counted foliage\", show=True)\n", + " nearby_group = folium.FeatureGroup(name=\"nearby, not counted\", show=False)\n", + "\n", + " for index, feature in enumerate(features, start=1):\n", + " group = counted_group if feature[\"counted\"] else nearby_group\n", + " geom_wgs84 = bng_geom_to_wgs84(feature[\"geometry_bng\"])\n", + " centroid_lon, centroid_lat = _to_wgs84.transform(feature[\"centroid_bng\"].x, feature[\"centroid_bng\"].y)\n", + " style = {\n", + " \"color\": \"#15803d\" if feature[\"counted\"] else \"#6b7280\",\n", + " \"weight\": 2 if feature[\"counted\"] else 1,\n", + " \"fillColor\": \"#22c55e\" if feature[\"counted\"] else \"#9ca3af\",\n", + " \"fillOpacity\": 0.45 if feature[\"counted\"] else 0.20,\n", + " }\n", + " popup_html = (\n", + " f\"TOW polygon {index}
\"\n", + " f\"Status: {'counted' if feature['counted'] else 'not counted'}
\"\n", + " f\"Type: {feature['woodland_type']}
\"\n", + " f\"Area: {feature['area_m2']:.1f} sqm
\"\n", + " f\"Centroid distance: {feature['distance_m']:.1f} m\"\n", + " )\n", + " folium.GeoJson(\n", + " {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": mapping(geom_wgs84),\n", + " \"properties\": {\"popup\": popup_html},\n", + " },\n", + " style_function=lambda _feature, style=style: style,\n", + " tooltip=\"counted foliage\" if feature[\"counted\"] else \"nearby foliage, not counted\",\n", + " popup=folium.Popup(popup_html, max_width=280),\n", + " ).add_to(group)\n", + " folium.CircleMarker(\n", + " [centroid_lat, centroid_lon],\n", + " radius=3,\n", + " color=\"#14532d\" if feature[\"counted\"] else \"#4b5563\",\n", + " fill=True,\n", + " fill_opacity=0.95,\n", + " tooltip=f\"TOW centroid: {feature['distance_m']:.1f}m\",\n", + " ).add_to(group)\n", + "\n", + " counted_group.add_to(m)\n", + " nearby_group.add_to(m)\n", + " folium.CircleMarker(\n", + " [point_lat, point_lon],\n", + " radius=7,\n", + " color=\"#1d4ed8\",\n", + " fill=True,\n", + " fill_color=\"#1d4ed8\",\n", + " fill_opacity=1,\n", + " tooltip=f\"{postcode} centroid\",\n", + " ).add_to(m)\n", + " folium.LayerControl(collapsed=True).add_to(m)\n", + "\n", + " title = escape(\n", + " f\"{postcode}: {row[density_col]:.1f}% | {int(row[count_col])} features | \"\n", + " f\"visual check {visual_density:.1f}%\"\n", + " )\n", + " iframe = escape(m.get_root().render(), quote=True)\n", + " return f\"\"\"\n", + "
\n", + "

{title}

\n", + " \n", + "
\n", + " \"\"\"\n", + "\n", + "\n", + "def side_by_side_document(cards: list[str]) -> str:\n", + " return f\"\"\"\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Street tree density visual check\n", + " \n", + " \n", + " \n", + "
{''.join(cards)}
\n", + " \n", + " \n", + " \"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "sample-and-render", + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-12T20:42:02.017000Z", + "iopub.status.busy": "2026-05-12T20:42:02.016926Z", + "iopub.status.idle": "2026-05-12T20:42:02.401198Z", + "shell.execute_reply": "2026-05-12T20:42:02.400928Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Street tree density visual check\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "

EX10 9HB: 3.9% | 8 features | visual check 3.9%

\n", + " \n", + "
\n", + " \n", + "
\n", + "

EX8 2DY: 16.9% | 8 features | visual check 16.9%

\n", + " \n", + "
\n", + " \n", + "
\n", + "

PO10 8QT: 2.8% | 5 features | visual check 2.8%

\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with redirect_stdout(io.StringIO()):\n", + " dataset_path = _tow_dataset_path(TOW_ZIP, EXTRACT_DIR, force_extract=False, use_vsizip=False)\n", + " postcode_points = _postcode_points(ARCGIS, max_postcodes=None)\n", + "layer_names = _layers(dataset_path, selected_layers=None)\n", + "\n", + "existing_districts = {path.stem for path in POSTCODE_BOUNDARY_UNITS.glob(\"*.geojson\")}\n", + "sampled = (\n", + " pl.scan_parquet(POSTCODE_TREE_DENSITY)\n", + " .filter(pl.col(count_col) > 0)\n", + " .filter(pl.col(density_col).is_not_null())\n", + " .with_columns(pl.col(\"postcode\").str.split(\" \").list.first().alias(\"district\"))\n", + " .filter(pl.col(\"district\").is_in(existing_districts))\n", + " .select(\"postcode\", density_col, area_col, count_col, height_col)\n", + " .collect()\n", + " .sample(n=N_POSTCODES, seed=RANDOM_SEED, shuffle=True)\n", + " .join(postcode_points, on=\"postcode\", how=\"inner\")\n", + " .sort(\"postcode\")\n", + ")\n", + "\n", + "cards = [build_postcode_map(dataset_path, layer_names, row) for row in sampled.to_dicts()]\n", + "html = side_by_side_document(cards)\n", + "OUTPUT_HTML.write_text(html)\n", + "\n", + "HTML(html)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "property-map (3.12.13)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/frontend/scripts/prerender.mjs b/frontend/scripts/prerender.mjs index e5bb063..46ff71d 100644 --- a/frontend/scripts/prerender.mjs +++ b/frontend/scripts/prerender.mjs @@ -12,9 +12,9 @@ const ROUTES = [ { path: '/', output: 'index.html', - title: 'Perfect Postcode - Find where to buy before browsing listings', + title: 'Find the best postcodes and areas to live in England | Perfect Postcode', description: - 'Search every postcode by budget, commute, schools, safety, noise, broadband, prices and more. Build a better home-buying shortlist before viewings.', + 'Discover where to live by comparing England postcodes by budget, commute, schools, crime, noise, broadband, property prices and local amenities before viewing homes.', }, { path: '/learn', diff --git a/frontend/src/components/home/HomePage.tsx b/frontend/src/components/home/HomePage.tsx index b74f9a2..5eb38eb 100644 --- a/frontend/src/components/home/HomePage.tsx +++ b/frontend/src/components/home/HomePage.tsx @@ -301,24 +301,22 @@ export default function HomePage({ {t('home.seeTheDifference')} -
-
-
+
+
+
-
{t('home.statProperties')}
+
{t('home.statProperties')}
-
-
+
+
-
{t('home.statFilters')}
+
{t('home.statFilters')}
-
-
- {t('home.statEvery')} -
-
{t('home.statPostcodeInEngland')}
+
+
{t('home.statEvery')}
+
{t('home.statPostcodeInEngland')}
diff --git a/frontend/src/components/home/ProductShowcase.tsx b/frontend/src/components/home/ProductShowcase.tsx index b0d46ca..31f26d2 100644 --- a/frontend/src/components/home/ProductShowcase.tsx +++ b/frontend/src/components/home/ProductShowcase.tsx @@ -840,7 +840,7 @@ function ScoutScreen({ isActive }: { isActive: boolean }) {
diff --git a/frontend/src/components/map/FeatureBrowser.tsx b/frontend/src/components/map/FeatureBrowser.tsx index 7d70b21..a085925 100644 --- a/frontend/src/components/map/FeatureBrowser.tsx +++ b/frontend/src/components/map/FeatureBrowser.tsx @@ -7,7 +7,7 @@ import { FilterIcon } from '../ui/icons'; import { CollapsibleGroupHeader } from '../ui/CollapsibleGroupHeader'; import { EmptyState } from '../ui/EmptyState'; import type { FeatureMeta } from '../../types'; -import { groupFeaturesByCategory, orderFilterGroups } from '../../lib/features'; +import { groupFeaturesByCategory } from '../../lib/features'; import { FeatureInfoPopup } from '../ui/FeatureInfoPopup'; import { FeatureActions } from '../ui/FeatureIcons'; import { FeatureLabel } from '../ui/FeatureLabel'; @@ -73,7 +73,7 @@ export default function FeatureBrowser({ ); }, [availableFeatures, search]); - const grouped = useMemo(() => orderFilterGroups(groupFeaturesByCategory(filtered)), [filtered]); + const grouped = useMemo(() => groupFeaturesByCategory(filtered), [filtered]); // When searching, expand all groups so results are visible const isSearching = search.length > 0; diff --git a/frontend/src/components/map/Filters.tsx b/frontend/src/components/map/Filters.tsx index 7728865..7bb4521 100644 --- a/frontend/src/components/map/Filters.tsx +++ b/frontend/src/components/map/Filters.tsx @@ -45,6 +45,7 @@ import { import { POI_FILTER_NAMES, POI_DISTANCE_FILTER_NAME, + TRANSPORT_DISTANCE_FILTER_NAME, POI_COUNT_2KM_FILTER_NAME, POI_COUNT_5KM_FILTER_NAME, getDefaultPoiDistanceFeatureName, @@ -170,6 +171,10 @@ export default memo(function Filters({ () => getDefaultPoiDistanceFeatureName(features), [features] ); + const defaultTransportDistanceFeatureName = useMemo( + () => getDefaultPoiFilterFeatureName(features, TRANSPORT_DISTANCE_FILTER_NAME), + [features] + ); const defaultPoiCount2KmFeatureName = useMemo( () => getDefaultPoiFilterFeatureName(features, POI_COUNT_2KM_FILTER_NAME), [features] @@ -179,6 +184,10 @@ export default memo(function Filters({ [features] ); const poiDistanceMeta = useMemo(() => getPoiDistanceFilterMeta(features), [features]); + const transportDistanceMeta = useMemo( + () => getPoiFilterMeta(features, TRANSPORT_DISTANCE_FILTER_NAME), + [features] + ); const poiCount2KmMeta = useMemo( () => getPoiFilterMeta(features, POI_COUNT_2KM_FILTER_NAME), [features] @@ -190,18 +199,25 @@ export default memo(function Filters({ const poiFilterMetas = useMemo( () => ({ [POI_DISTANCE_FILTER_NAME]: poiDistanceMeta, + [TRANSPORT_DISTANCE_FILTER_NAME]: transportDistanceMeta, [POI_COUNT_2KM_FILTER_NAME]: poiCount2KmMeta, [POI_COUNT_5KM_FILTER_NAME]: poiCount5KmMeta, }), - [poiDistanceMeta, poiCount2KmMeta, poiCount5KmMeta] + [poiDistanceMeta, transportDistanceMeta, poiCount2KmMeta, poiCount5KmMeta] ); const defaultPoiFilterFeatureNames = useMemo( () => ({ [POI_DISTANCE_FILTER_NAME]: defaultPoiDistanceFeatureName, + [TRANSPORT_DISTANCE_FILTER_NAME]: defaultTransportDistanceFeatureName, [POI_COUNT_2KM_FILTER_NAME]: defaultPoiCount2KmFeatureName, [POI_COUNT_5KM_FILTER_NAME]: defaultPoiCount5KmFeatureName, }), - [defaultPoiDistanceFeatureName, defaultPoiCount2KmFeatureName, defaultPoiCount5KmFeatureName] + [ + defaultPoiDistanceFeatureName, + defaultTransportDistanceFeatureName, + defaultPoiCount2KmFeatureName, + defaultPoiCount5KmFeatureName, + ] ); const schoolFilterItems = useMemo(() => { return Object.keys(filters) @@ -256,7 +272,11 @@ export default memo(function Filters({ const backendFeature = backendName ? features.find((feature) => feature.name === backendName) : undefined; - return { ...(backendFeature ?? poiFilterMetas[filterName]), name, group: 'Amenities' }; + return { + ...(backendFeature ?? poiFilterMetas[filterName]), + name, + group: poiFilterMetas[filterName].group, + }; }); }, [filters, features, poiFilterMetas]); const availableFeatures = useMemo(() => { @@ -266,8 +286,21 @@ export default memo(function Filters({ let insertedElectionVoteShareFilter = false; let insertedEthnicityFilter = false; const insertedPoiFilters = new Set(); + const maybeInsertPoiFilter = (filterName: PoiFilterName | null) => { + if ( + filterName && + defaultPoiFilterFeatureNames[filterName] && + !insertedPoiFilters.has(filterName) + ) { + result.push(poiFilterMetas[filterName]); + insertedPoiFilters.add(filterName); + } + }; for (const feature of features) { + if (feature.group === 'Transport') { + maybeInsertPoiFilter(TRANSPORT_DISTANCE_FILTER_NAME); + } if (isSchoolFilterName(feature.name)) { if (defaultSchoolFeatureName && !insertedSchoolFilter) { result.push(schoolMeta); @@ -297,15 +330,7 @@ export default memo(function Filters({ continue; } if (isPoiFilterFeatureName(feature.name)) { - const filterName = getPoiFilterName(feature.name); - if ( - filterName && - defaultPoiFilterFeatureNames[filterName] && - !insertedPoiFilters.has(filterName) - ) { - result.push(poiFilterMetas[filterName]); - insertedPoiFilters.add(filterName); - } + maybeInsertPoiFilter(getPoiFilterName(feature.name)); continue; } if (!enabledFeatures.has(feature.name)) result.push(feature); @@ -332,9 +357,19 @@ export default memo(function Filters({ let insertedSpecificCrimeFilters = false; let insertedElectionVoteShareFilters = false; let insertedEthnicityFilters = false; - let insertedPoiDistanceFilters = false; + const insertedPoiFilters = new Set(); + const insertPoiFilterItems = (filterName: PoiFilterName | null) => { + if (!filterName || insertedPoiFilters.has(filterName)) return; + result.push( + ...poiDistanceFilterItems.filter((item) => getPoiFilterName(item.name) === filterName) + ); + insertedPoiFilters.add(filterName); + }; for (const feature of features) { + if (feature.group === 'Transport') { + insertPoiFilterItems(TRANSPORT_DISTANCE_FILTER_NAME); + } if (isSchoolFilterName(feature.name)) { if (!insertedSchoolFilter) { result.push(...schoolFilterItems); @@ -364,10 +399,7 @@ export default memo(function Filters({ continue; } if (isPoiFilterFeatureName(feature.name)) { - if (!insertedPoiDistanceFilters) { - result.push(...poiDistanceFilterItems); - insertedPoiDistanceFilters = true; - } + insertPoiFilterItems(getPoiFilterName(feature.name)); continue; } if (enabledFeatures.has(feature.name)) result.push(feature); @@ -583,6 +615,7 @@ export default memo(function Filters({ electionVoteShareMeta, ethnicityMeta, poiDistanceMeta, + transportDistanceMeta, poiCount2KmMeta, poiCount5KmMeta, ]} diff --git a/frontend/src/components/map/HoverCard.tsx b/frontend/src/components/map/HoverCard.tsx index 590d26b..6e6596b 100644 --- a/frontend/src/components/map/HoverCard.tsx +++ b/frontend/src/components/map/HoverCard.tsx @@ -7,7 +7,11 @@ import { SCHOOL_FILTER_NAME, getSchoolBackendFeatureName } from '../../lib/schoo import { getSpecificCrimeFeatureName } from '../../lib/crime-filter'; import { getElectionVoteShareFeatureName } from '../../lib/election-filter'; import { getEthnicityFeatureName } from '../../lib/ethnicity-filter'; -import { POI_DISTANCE_FILTER_NAME, getPoiDistanceFeatureName } from '../../lib/poi-distance-filter'; +import { + POI_DISTANCE_FILTER_NAME, + getPoiDistanceFeatureName, + getPoiFilterName, +} from '../../lib/poi-distance-filter'; interface HoverCardData { count: number; @@ -69,7 +73,7 @@ export default memo(function HoverCard({ name: schoolBackendName ? SCHOOL_FILTER_NAME : poiDistanceFeatureName - ? POI_DISTANCE_FILTER_NAME + ? (getPoiFilterName(name) ?? POI_DISTANCE_FILTER_NAME) : backendName, value: formatValue(val, meta), }); diff --git a/frontend/src/components/map/MobileBottomSheet.test.tsx b/frontend/src/components/map/MobileBottomSheet.test.tsx index 3dde301..39157c5 100644 --- a/frontend/src/components/map/MobileBottomSheet.test.tsx +++ b/frontend/src/components/map/MobileBottomSheet.test.tsx @@ -141,7 +141,7 @@ describe('MobileBottomSheet keyboard avoidance', () => { it('reports covered height while the drawer is being dragged', async () => { installViewport({ innerHeight: 800, visualHeight: 800 }); const { coveredHeights, sheet } = renderSheet(); - const handle = sheet.firstElementChild; + const handle = sheet.firstElementChild?.firstElementChild; if (!(handle instanceof HTMLElement)) throw new Error('Expected bottom sheet drag handle'); diff --git a/frontend/src/components/map/MobileBottomSheet.tsx b/frontend/src/components/map/MobileBottomSheet.tsx index 54dec45..628495e 100644 --- a/frontend/src/components/map/MobileBottomSheet.tsx +++ b/frontend/src/components/map/MobileBottomSheet.tsx @@ -228,14 +228,18 @@ export default function MobileBottomSheet({ : 'height 140ms ease, bottom 180ms ease', }} > -
-
+
+
+
diff --git a/frontend/src/components/map/filters/ElectionVoteShareFilterCard.tsx b/frontend/src/components/map/filters/ElectionVoteShareFilterCard.tsx index e018544..1bb56dc 100644 --- a/frontend/src/components/map/filters/ElectionVoteShareFilterCard.tsx +++ b/frontend/src/components/map/filters/ElectionVoteShareFilterCard.tsx @@ -123,7 +123,7 @@ export function ElectionVoteShareFilterCard({ return (
onFilterChange(poiFeature.name, clampPoiFilterRange(v, selectedFeature)) } diff --git a/frontend/src/components/map/filters/SpecificCrimeFilterCard.tsx b/frontend/src/components/map/filters/SpecificCrimeFilterCard.tsx index f9cdc06..7316929 100644 --- a/frontend/src/components/map/filters/SpecificCrimeFilterCard.tsx +++ b/frontend/src/components/map/filters/SpecificCrimeFilterCard.tsx @@ -119,7 +119,7 @@ export function SpecificCrimeFilterCard({ return (
{ts(name)}
diff --git a/frontend/src/i18n/locales/de.ts b/frontend/src/i18n/locales/de.ts index f25714e..b5ab4e1 100644 --- a/frontend/src/i18n/locales/de.ts +++ b/frontend/src/i18n/locales/de.ts @@ -621,6 +621,10 @@ const de: Translations = { dsGreenspaceOrigin: 'Ordnance Survey', dsGreenspaceUse: 'Offizielle Grünflächengrenzen für Großbritannien, einschließlich öffentlicher Parks, Gärten, Sportplätze und Spielplätze. Polygon-Schwerpunkte werden für die Parknähezählung und Entfernungsberechnung zum nächsten Park verwendet.', + dsTowName: 'Nationale Karte der Bäume außerhalb von Waldflächen', + dsTowOrigin: 'Forest Research / Defra NCEA', + dsTowUse: + 'Baumkronen-Polygone für Einzelbäume, Baumgruppen und kleine Gehölze in England. Hier verwendet, um die straßennahe Baumdichte rund um Immobilienadressen zu schätzen.', dsNaptanName: 'NaPTAN (Haltestellen des öffentlichen Verkehrs)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: @@ -996,6 +1000,7 @@ const de: Translations = { 'Specific crimes': 'Einzelne Delikte', Ethnicities: 'Ethnien', 'Amenity distance': 'Entfernung zu Infrastruktur', + 'Closest transport option': 'Nächste Verkehrsoption', 'Amenities within 2km': 'Infrastruktur im Umkreis von 2 km', 'Amenities within 5km': 'Infrastruktur im Umkreis von 5 km', diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 7fddc59..906d47b 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -410,18 +410,18 @@ const en = { // ── Home Page ────────────────────────────────────── home: { - heroEyebrow: 'For buyers asking “where should I even look?”', - heroTitle1: 'Find the postcodes that', - heroTitle2: 'fit your life', - heroTitle3: 'Not just the areas you already know.', + heroEyebrow: "For buyers who don't know where to start", + heroTitle1: 'Start with your needs,', + heroTitle2: 'not an area name', + heroTitle3: 'Then shortlist postcodes worth viewing.', heroSubtitle: - 'From London boroughs to commuter towns and regional cities, England has too many places to research one by one.', + 'Most buyers start with a few familiar areas, then stitch together listing sites, commute checks, school reports, crime maps, broadband tools, and sold prices in separate tabs.', heroDescription: - 'Set your budget, commute, schools, safety, noise, broadband, and lifestyle needs. Perfect Postcode scans England’s postcodes and reveals the places that actually fit, including areas you’d never have typed into a listing portal.', - exploreTheMap: 'Find my matching postcodes', - seeTheDifference: 'See how it works', - productDemoLabel: 'Perfect Postcode product demo', - playProductDemo: 'Play Perfect Postcode product demo', + 'Set your budget, commute, schools, safety, noise, broadband, parks, shops, and property needs. Perfect Postcode checks postcodes across England and shows the areas worth shortlisting, including places you may not know by name.', + exploreTheMap: 'Start matching postcodes', + seeTheDifference: 'Watch the demo', + productDemoLabel: 'Watch the postcode shortlist demo', + playProductDemo: 'Play the postcode shortlist demo', scrollToProductDemo: 'Scroll to product demo', showcaseHeader: 'How it works', showcaseContext: 'How Perfect Postcode works', @@ -429,43 +429,43 @@ const en = { showcaseFeatureNoiseShort: 'Noise', showcaseFeatureSchoolsShort: 'Schools', showcaseFeatureTravelShort: 'Travel', - showcaseGoodPrimariesNearby: '{{count}}+ good primaries nearby', - showcaseWithinRail: 'Within {{count}} min of rail', - showcaseMatchingHomesLabel: 'Matching homes', - showcaseMatchingHomes: '{{value}} matching homes', + showcaseGoodPrimariesNearby: '{{count}}+ Good or Outstanding primary schools nearby', + showcaseWithinRail: 'Within {{count}} min of a station', + showcaseMatchingHomesLabel: 'Matching postcodes', + showcaseMatchingHomes: '{{value}} matching postcodes', showcaseMedianPrice: '{{value}} median', showcaseJourneyRoutes: 'Journey routes', showcaseNearby: '{{value}} nearby', showcasePoliticalVoteShare: 'Political vote share', - showcaseLotsMore: '...and lots more', + showcaseLotsMore: 'More neighbourhood data', showcaseMinutes: '{{count}} min', showcaseSendShortlist: 'Send the shortlist', showcaseDownloadXlsx: 'Download .xlsx', showcaseTopThree: 'Top 3', - showcaseScoutBullet1: 'Walk the streets before the listing search narrows your options.', + showcaseScoutBullet1: 'Check the street before you commit to listing alerts.', showcaseScoutBullet2: 'Test the commute from a real front door, not a borough name.', - showcaseScoutBullet3: 'Compare viewings with evidence already in hand.', + showcaseScoutBullet3: 'Compare viewings with evidence already saved.', showcaseStep1Tab: 'Filter', - showcaseStep1Title: 'Turn vague needs into a tight search', + showcaseStep1Title: 'Turn your needs into clear search filters', showcaseStep1Body: - 'Set what matters and see exactly how many wrong-fit postcodes each requirement keeps out of your search.', + 'Set what matters and see how many unsuitable postcodes each requirement removes.', showcaseStep1Chip1: 'Quiet streets', - showcaseStep1Chip2: 'Top-rated primaries', + showcaseStep1Chip2: 'Good primaries nearby', showcaseStep1Chip3: 'Under £500k', showcaseStep1VennCenter: 'Postcodes that meet all three', showcaseStep2Tab: 'Match', - showcaseStep2Title: 'Let the map surface places you wouldn’t have typed', + showcaseStep2Title: 'Find places you would never have known to search', showcaseStep2Body: - 'Scan England by fit instead of starting from familiar area names. Hidden pockets become visible before listing portals narrow your imagination.', + 'Search by what you need, not by area name. The map shows suitable postcode clusters before listing sites narrow the search.', showcaseStep2Region: 'Greater London', showcaseStep2Sources: 'Land Registry · ONS · Ofsted · DfT', showcaseStep2ClustersLabel: 'Matching clusters', showcaseStep3Tab: 'Inspect', - showcaseStep3Title: 'Inspect why a postcode made the cut', + showcaseStep3Title: 'See why a postcode matches', showcaseStep3Body: 'Open any matching area and check prices, safety, schools, broadband, and trade-offs in one pane before you spend a weekend there.', - showcaseStep3HeaderArea: 'Your perfect postcode', - showcaseStep3HeaderFit: 'Neighbourhood evidence', + showcaseStep3HeaderArea: 'Shortlisted postcode', + showcaseStep3HeaderFit: 'Why it matches', showcaseStep3Stat1Label: 'Sold price trend', showcaseStep3Stat2Label: 'Crime rate', showcaseStep3Stat2Value: 'Below borough avg.', @@ -473,50 +473,50 @@ const en = { showcaseStep3Stat4Label: 'Broadband', showcaseStep3Stat4Value: '1 Gbps available', showcaseStep3Stat5Label: 'Primary schools', - showcaseStep3Stat5Value: '3 outstanding within 1 mile', + showcaseStep3Stat5Value: '3 Outstanding within 1 mile', showcaseStep4Tab: 'Scout', - showcaseStep4Title: 'Scout it out yourself', + showcaseStep4Title: 'Take the strongest areas into the real world', showcaseStep4Body: - 'Take three grounded starting points into the real world. Walk the streets, test the commute, and compare viewings with context.', + 'Export suggested postcodes to visit. Walk the streets, test the commute, and compare viewings with the data you saved.', showcaseStep4FileName: 'areas-to-scout.xlsx', showcaseStep4ExportLabel: 'Export to Excel', showcaseStep4ColPostcode: 'Postcode', - showcaseStep4ColScore: 'Fit', + showcaseStep4ColScore: 'Match', showcaseStep4ColCommute: 'Commute', - showcaseStep4ColPrice: 'Median sold', - showcaseStep4Conclusion: 'You can start your journey from here.', - statProperties: 'historical sales', - statFilters: 'combinable filters', + showcaseStep4ColPrice: 'Median sold price', + showcaseStep4Conclusion: 'Export a shortlist and start checking streets.', + statProperties: 'HM Land Registry sales', + statFilters: 'ways to narrow the map', statEvery: 'Every', - statPostcodeInEngland: 'postcode in England', - ourPhilosophy: 'Start with your life, not a postcode', + statPostcodeInEngland: 'active postcode in England', + ourPhilosophy: 'Start with needs. End with postcodes.', philosophyP1: - 'Most property sites ask where you want to live. In London that’s painfully hard, but the same problem shows up across England: buyers choose from the few places they know, then cross-check commute tools, Ofsted, police data, Street View, broadband checkers, and sold prices in separate tabs.', + 'Listing sites force you to pick a town, borough, or postcode before you know which places can work. That means the search is limited by memory, recommendations, and whatever happens to be for sale this week.', philosophyP2: - 'Perfect Postcode flips the search. Tell the map what matters and it shows the postcodes that qualify, with evidence for why they’re worth inspecting. Data first, then go test the vibe.', + 'Perfect Postcode starts with your requirements instead. Tell the map your budget, commute, school, safety, noise, broadband, and local-context needs, then inspect the postcodes that match before you open listings.', streetTitle: 'Places change street by street', streetIntro: - 'Broad area names hide the details that matter: the station side, the road noise, the school mix, the exact commute, and what similar homes actually sold for.', - streetCard1Title: 'Find areas you may have missed', + 'Area names hide the details that matter: the station side, the road noise, the school mix, the exact commute, and what similar homes actually sold for.', + streetCard1Title: 'Find places you may have missed', streetCard1Body: - 'Surface postcodes that match your requirements instead of relying on familiar names, friend recommendations, or “up-and-coming” hype.', - streetCard2Title: 'See the trade-offs before viewings', + 'Search postcode-level data by your requirements instead of relying on familiar names, friend recommendations, or “up-and-coming” hype.', + streetCard2Title: 'Check the trade-offs before viewings', streetCard2Body: - 'Compare price, space, commute, safety, schools, broadband, noise, and energy ratings before you spend weekends travelling between viewings.', - othersVs: 'Others vs', - checkMyPostcode: 'Listing portals', - areaGuides: 'Postcode reports', - compSearchWithout: 'Discover areas before you know their names', + 'Compare price, space, commute, safety, schools, broadband, noise, energy ratings, parks, and local amenities before you spend weekends travelling between viewings.', + othersVs: 'Other tools vs', + checkMyPostcode: 'Listing sites', + areaGuides: 'Postcode checkers', + compSearchWithout: 'Find areas before you know their names', compSearchWithoutSub: '(requirements first, location second)', - compAreaData: 'Postcode-level neighbourhood evidence', + compAreaData: 'Neighbourhood evidence in one place', compAreaDataSub: '(crime, schools, noise, broadband, amenities)', - compPropertyData: 'Property-level history', + compPropertyData: 'Street-level property context', compPropertyDataSub: '(sold prices, EPC, floor area, estimated value)', - compFilters: '56 filters working together', - compFiltersSub: '(not one postcode or one listing at a time)', - ctaTitle: 'Stop guessing where to buy.', + compFilters: 'All your requirements working together', + compFiltersSub: '(budget + commute + schools + safety + local context)', + ctaTitle: 'Do the area research before you book the viewing.', ctaDescription: - 'Build a shortlist of postcodes that fit your actual life, then test them in person.', + 'Build a postcode shortlist from price, commute, schools, safety, noise, broadband, amenities, and sold-price evidence, then verify the streets in person.', }, // ── Pricing Page ─────────────────────────────────── @@ -612,7 +612,7 @@ const en = { dsTowName: 'National Trees Outside Woodland Map', dsTowOrigin: 'Forest Research / Defra NCEA', dsTowUse: - 'Tree canopy polygons for lone trees, groups of trees, and small woodlands in England. Used here to estimate street-level tree density around property addresses.', + 'Tree canopy polygons for lone trees, groups of trees, and small woodlands in England. Used here to estimate street-level tree coverage percentiles around property addresses.', dsNaptanName: 'NaPTAN (Public Transport Stops)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: @@ -980,6 +980,7 @@ const en = { 'Specific crimes': 'Specific crimes', Ethnicities: 'Ethnicities', 'Amenity distance': 'Amenity distance', + 'Closest transport option': 'Closest transport option', 'Amenities within 2km': 'Amenities within 2km', 'Amenities within 5km': 'Amenities within 5km', diff --git a/frontend/src/i18n/locales/fr.ts b/frontend/src/i18n/locales/fr.ts index 30d9033..2d2727a 100644 --- a/frontend/src/i18n/locales/fr.ts +++ b/frontend/src/i18n/locales/fr.ts @@ -622,6 +622,10 @@ const fr: Translations = { dsGreenspaceOrigin: 'Ordnance Survey', dsGreenspaceUse: 'Limites officielles des espaces verts de Grande-Bretagne, incluant parcs publics, jardins, terrains de sport et aires de jeux. Les centroïdes des polygones sont utilisés pour le comptage de proximité des parcs et le calcul de la distance au parc le plus proche.', + dsTowName: 'Carte nationale des arbres hors forêt', + dsTowOrigin: 'Forest Research / Defra NCEA', + dsTowUse: + 'Polygones de couvert arboré pour les arbres isolés, groupes d’arbres et petits bois en Angleterre. Utilisés ici pour estimer la densité d’arbres au niveau de la rue autour des adresses de biens.', dsNaptanName: 'NaPTAN (arrêts de transport public)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: @@ -995,6 +999,7 @@ const fr: Translations = { 'Specific crimes': 'Crimes spécifiques', Ethnicities: 'Origines ethniques', 'Amenity distance': 'Distance aux commodités', + 'Closest transport option': 'Transport le plus proche', 'Amenities within 2km': 'Commodités à moins de 2 km', 'Amenities within 5km': 'Commodités à moins de 5 km', diff --git a/frontend/src/i18n/locales/hi.ts b/frontend/src/i18n/locales/hi.ts index 8012fe2..8da14f1 100644 --- a/frontend/src/i18n/locales/hi.ts +++ b/frontend/src/i18n/locales/hi.ts @@ -583,6 +583,10 @@ const hi: Translations = { dsGreenspaceOrigin: 'Ordnance Survey', dsGreenspaceUse: 'ग्रेट ब्रिटेन के लिए आधिकारिक हरित क्षेत्र सीमाएं, जिनमें सार्वजनिक पार्क, उद्यान, खेल मैदान और खेलने की जगहें शामिल हैं. पार्क निकटता गिनती और निकटतम पार्क दूरी गणना के लिए बहुभुज केंद्र बिंदु उपयोग होते हैं.', + dsTowName: 'वन क्षेत्र से बाहर पेड़ों का राष्ट्रीय नक्शा', + dsTowOrigin: 'Forest Research / Defra NCEA', + dsTowUse: + 'इंग्लैंड में अकेले पेड़ों, पेड़ों के समूहों और छोटे वन क्षेत्रों के वृक्ष आच्छादन बहुभुज. यहां संपत्ति पतों के आसपास सड़क-स्तर पेड़ घनत्व का अनुमान लगाने के लिए उपयोग किया गया है.', dsNaptanName: 'NaPTAN (सार्वजनिक परिवहन स्टॉप)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: @@ -912,6 +916,7 @@ const hi: Translations = { 'Specific crimes': 'विशिष्ट अपराध', Ethnicities: 'जातीय समूह', 'Amenity distance': 'सुविधा दूरी', + 'Closest transport option': 'निकटतम परिवहन विकल्प', 'Amenities within 2km': '2 किमी के अंदर सुविधाएं', 'Amenities within 5km': '5 किमी के अंदर सुविधाएं', Detached: 'अलग मकान', diff --git a/frontend/src/i18n/locales/hu.ts b/frontend/src/i18n/locales/hu.ts index 389c7b3..98ad4ee 100644 --- a/frontend/src/i18n/locales/hu.ts +++ b/frontend/src/i18n/locales/hu.ts @@ -615,6 +615,10 @@ const hu: Translations = { dsGreenspaceOrigin: 'Ordnance Survey', dsGreenspaceUse: 'Hivatalos zöldterületi határok Nagy-Britanniában, beleértve a közparkokat, kerteket, sportterületeket és játszótereket. A poligon középpontjait használjuk a park közelségi számláláshoz és a legközelebbi park távolságának számításához.', + dsTowName: 'Országos, erdőn kívüli fák térképe', + dsTowOrigin: 'Forest Research / Defra NCEA', + dsTowUse: + 'Fa lombkorona-poligonok magányos fákhoz, facsoportokhoz és kisebb erdőfoltokhoz Angliában. Itt az ingatlancímek körüli utcaszintű fasűrűség becslésére használjuk.', dsNaptanName: 'NaPTAN (Tömegközlekedési megállók)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: @@ -987,6 +991,7 @@ const hu: Translations = { 'Specific crimes': 'Konkrét bűncselekmények', Ethnicities: 'Etnikai csoportok', 'Amenity distance': 'Szolgáltatás-távolság', + 'Closest transport option': 'Legközelebbi közlekedési lehetőség', 'Amenities within 2km': 'Szolgáltatások 2 km-en belül', 'Amenities within 5km': 'Szolgáltatások 5 km-en belül', diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index a112a67..cc8b488 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -597,6 +597,10 @@ const zh: Translations = { dsGreenspaceOrigin: 'Ordnance Survey', dsGreenspaceUse: '大不列颠地区权威的绿地边界数据,包括公共公园、花园、运动场和游乐场。多边形质心用于公园邻近度计数和最近公园距离计算。', + dsTowName: '国家非林地树木地图', + dsTowOrigin: 'Forest Research / Defra NCEA', + dsTowUse: + '英格兰孤立树木、树群和小片林地的树冠多边形。此处用于估算房产地址周围街道级树木密度。', dsNaptanName: 'NaPTAN(公共交通站点)', dsNaptanOrigin: 'Department for Transport', dsNaptanUse: '英格兰各地铁路、公交、地铁/有轨电车、渡轮和机场的站点位置。', @@ -956,6 +960,7 @@ const zh: Translations = { 'Specific crimes': '具体犯罪', Ethnicities: '族裔', 'Amenity distance': '配套设施距离', + 'Closest transport option': '最近的交通选择', 'Amenities within 2km': '2 公里内配套设施', 'Amenities within 5km': '5 公里内配套设施', diff --git a/frontend/src/index.css b/frontend/src/index.css index c98931d..76f2f02 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -192,6 +192,48 @@ h3 { height: 5rem; } +.home-hero-stats { + flex-direction: column; + gap: 0.7rem; +} + +.home-hero-stat { + display: inline-flex; + min-width: 0; + align-items: baseline; + gap: 0.4rem; + white-space: nowrap; +} + +.home-hero-stat-value { + flex: 0 0 auto; + font-size: clamp(1.25rem, 5.2vw, 1.875rem); + font-weight: 700; + line-height: 1.05; + color: #ffffff; +} + +.home-hero-stat-label { + min-width: 0; + flex: 0 1 auto; + color: #e7e5e4; + font-size: clamp(0.78rem, 3.25vw, 0.875rem); + line-height: 1.15; + white-space: nowrap; +} + +@media (min-width: 640px) { + .home-hero-stats { + flex-direction: row; + column-gap: 3rem; + row-gap: 1rem; + } + + .home-hero-stat-value { + font-size: 1.875rem; + } +} + @media (min-width: 1200px) { .home-hero-container { padding-top: 3rem; @@ -300,7 +342,30 @@ h3 { } } +@keyframes scout-export-icon-pop { + 0%, + 54%, + 100% { + transform: scale(1); + } + 62% { + transform: scale(0.82) rotate(-5deg); + } + 72% { + transform: scale(1.13) rotate(4deg); + } + 84% { + transform: scale(1); + } +} + .scout-export-action { + transform-origin: center; + animation: none; +} + +.scout-export-icon { + transform-origin: center; animation: none; } @@ -331,10 +396,31 @@ h3 { animation: scout-export-ripple 2.4s ease-out 1 both; } +.scout-screen-active .scout-export-icon { + animation: scout-export-icon-pop 2.4s ease-in-out 1 both; +} + .scout-screen-active .scout-export-check { animation: scout-export-check 2.4s ease-in-out 1 both; } +@media (max-width: 639px) { + .scout-export-ripple { + width: 5.5rem; + height: 5.5rem; + } + + .scout-screen-active .scout-export-action { + animation-duration: 2s; + } + + .scout-screen-active .scout-export-ripple, + .scout-screen-active .scout-export-icon, + .scout-screen-active .scout-export-check { + animation-duration: 2s; + } +} + @media (prefers-reduced-motion: reduce) { .showcase-progress { animation: none !important; @@ -342,6 +428,7 @@ h3 { } .scout-export-action, + .scout-export-icon, .scout-export-ripple, .scout-export-check { animation: none !important; diff --git a/frontend/src/lib/api.test.ts b/frontend/src/lib/api.test.ts index 966a8f2..5fa37bf 100644 --- a/frontend/src/lib/api.test.ts +++ b/frontend/src/lib/api.test.ts @@ -8,6 +8,7 @@ import { createElectionVoteShareFilterKey } from './election-filter'; import { createEthnicityFilterKey } from './ethnicity-filter'; import { POI_COUNT_2KM_FILTER_NAME, + TRANSPORT_DISTANCE_FILTER_NAME, createPoiDistanceFilterKey, createPoiFilterKey, } from './poi-distance-filter'; @@ -141,18 +142,18 @@ describe('api utilities', () => { it('serializes amenity distance filters using their selected backend feature', () => { const features: FeatureMeta[] = [ { name: 'Distance to nearest park (km)', type: 'numeric', min: 0, max: 2 }, - { name: 'Distance to nearest Tesco (km)', type: 'numeric', min: 0, max: 5 }, + { name: 'Distance to nearest grocery store (km)', type: 'numeric', min: 0, max: 5 }, ]; expect( buildFilterString( { [createPoiDistanceFilterKey('Distance to nearest park (km)', 1)]: [0, 0.5], - [createPoiDistanceFilterKey('Distance to nearest Tesco (km)', 2)]: [0, 1], + [createPoiDistanceFilterKey('Distance to nearest grocery store (km)', 2)]: [0, 1], }, features ) - ).toBe('Distance to nearest park (km):0:0.5;;Distance to nearest Tesco (km):0:1'); + ).toBe('Distance to nearest park (km):0:0.5;;Distance to nearest grocery store (km):0:1'); }); it('serializes amenity count filters using their selected backend feature', () => { @@ -173,4 +174,23 @@ describe('api utilities', () => { ) ).toBe('Number of amenities (Cafe) within 2km:2:10'); }); + + it('serializes transport distance filters using their selected backend feature', () => { + const features: FeatureMeta[] = [ + { name: 'Distance to nearest amenity (Bus stop) (km)', type: 'numeric', min: 0, max: 2 }, + ]; + + expect( + buildFilterString( + { + [createPoiFilterKey( + TRANSPORT_DISTANCE_FILTER_NAME, + 'Distance to nearest amenity (Bus stop) (km)', + 1 + )]: [0, 0.4], + }, + features + ) + ).toBe('Distance to nearest amenity (Bus stop) (km):0:0.4'); + }); }); diff --git a/pipeline/transform/test_merge.py b/pipeline/transform/test_merge.py index 3964eca..5f749f7 100644 --- a/pipeline/transform/test_merge.py +++ b/pipeline/transform/test_merge.py @@ -1,6 +1,8 @@ import polars as pl from pipeline.transform.merge import ( + _AREA_COLUMNS, + _STATIC_POI_DISTANCE_RENAMES, _is_dynamic_poi_metric_column, _less_deprived_percentile_expr, ) @@ -27,7 +29,20 @@ def test_less_deprived_percentile_expr_uses_exact_scale_endpoints() -> None: def test_dynamic_poi_metric_columns_are_area_level() -> None: - assert _is_dynamic_poi_metric_column("Distance to nearest Cafe POI (km)") - assert _is_dynamic_poi_metric_column("Number of Cafe POIs within 2km") - assert _is_dynamic_poi_metric_column("Number of Cafe POIs within 5km") + assert _is_dynamic_poi_metric_column("Distance to nearest amenity (Cafe) (km)") + assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 2km") + assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 5km") assert not _is_dynamic_poi_metric_column("Number of restaurants within 2km") + + +def test_static_poi_distance_columns_are_renamed_to_configured_area_features() -> None: + expected = { + "parks_nearest_km": "Distance to nearest park (km)", + "grocery_store_nearest_km": "Distance to nearest grocery store (km)", + "cafe_nearest_km": "Distance to nearest cafe (km)", + "pub_nearest_km": "Distance to nearest pub (km)", + "restaurant_nearest_km": "Distance to nearest restaurant (km)", + } + + assert _STATIC_POI_DISTANCE_RENAMES == expected + assert set(expected.values()).issubset(_AREA_COLUMNS) diff --git a/pipeline/transform/test_poi_proximity.py b/pipeline/transform/test_poi_proximity.py index 68fb9c7..10aee24 100644 --- a/pipeline/transform/test_poi_proximity.py +++ b/pipeline/transform/test_poi_proximity.py @@ -1,6 +1,9 @@ import polars as pl -from pipeline.transform.poi_proximity import _build_poi_category_groups +from pipeline.transform.poi_proximity import ( + _build_poi_category_groups, + _dynamic_poi_metric_renames, +) def test_dynamic_poi_groups_include_requested_categories_only() -> None: @@ -11,6 +14,7 @@ def test_dynamic_poi_groups_include_requested_categories_only() -> None: + ["Leisure"] * 2 + ["Groceries"] * 101 + ["Groceries"] * 100 + + ["Leisure"] * 10 + ["Education"] * 200 + ["Health"] * 200 ), @@ -19,11 +23,12 @@ def test_dynamic_poi_groups_include_requested_categories_only() -> None: + ["Café", "Restaurant"] + ["Tesco"] * 101 + ["Waitrose"] * 100 + + ["Park"] * 10 + ["School"] * 200 + ["Pharmacy"] * 200 ), - "lat": [51.5] * 605, - "lng": [-0.1] * 605, + "lat": [51.5] * 615, + "lng": [-0.1] * 615, } ) @@ -37,5 +42,14 @@ def test_dynamic_poi_groups_include_requested_categories_only() -> None: "Tesco", } assert "poi_waitrose" not in groups + assert "poi_park" not in groups assert "poi_school" not in groups assert "poi_pharmacy" not in groups + + +def test_dynamic_poi_metric_renames_support_park_count_options() -> None: + assert _dynamic_poi_metric_renames({"parks": "Park"}) == { + "parks_nearest_km": "Distance to nearest amenity (Park) (km)", + "parks_2km": "Number of amenities (Park) within 2km", + "parks_5km": "Number of amenities (Park) within 5km", + } diff --git a/pipeline/transform/test_tree_density.py b/pipeline/transform/test_tree_density.py new file mode 100644 index 0000000..a6e1b06 --- /dev/null +++ b/pipeline/transform/test_tree_density.py @@ -0,0 +1,99 @@ +from pathlib import Path + +import polars as pl +import pytest + +from pipeline.transform.tree_density import ( + STREET_TREE_COVERAGE_COL, + STREET_TREE_DENSITY_COL, + _coverage_percentile_expr, + _metric_columns, + _postcode_density_percentile_col, + _with_postcode_density_percentiles, + _write_street_rollups, +) + + +def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None: + df = pl.DataFrame({"coverage": [0.0, 5.0, 10.0, None]}) + + result = df.lazy().with_columns( + _coverage_percentile_expr("coverage", "percentile") + ).collect() + + assert result["percentile"].to_list() == [0.0, 50.0, 100.0, None] + + +def test_coverage_percentile_expr_uses_exact_scale_endpoints() -> None: + df = pl.DataFrame({"coverage": [0.0, 0.0, 5.0, 10.0, 10.0]}) + + result = df.lazy().with_columns( + _coverage_percentile_expr("coverage", "percentile") + ).collect() + + assert result["percentile"].to_list() == [0.0, 0.0, 50.0, 100.0, 100.0] + + +def test_street_rollup_percentiles_are_ranked_over_raw_street_coverage( + tmp_path: Path, +) -> None: + radius_m = 50 + density_col, area_col, count_col, height_col = _metric_columns(radius_m) + percentile_col = _postcode_density_percentile_col(radius_m) + + postcode_metrics = _with_postcode_density_percentiles( + pl.DataFrame( + { + "postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"], + density_col: [10.0, 30.0, 50.0], + area_col: [100.0, 300.0, 500.0], + count_col: [1, 3, 5], + height_col: [4.0, 6.0, 8.0], + } + ), + radius_m, + ) + + price_paid = pl.DataFrame( + { + "postcode": ["AA1 1AA", "AA1 1AA", "AA1 1AB", "AA1 1AC"], + "paon": ["1", "2", "3", "4"], + "saon": ["", "", "", ""], + "street": ["Oak Road", "Oak Road", "Oak Road", "Elm Street"], + "locality": ["", "", "", ""], + "town_city": ["Test Town", "Test Town", "Test Town", "Test Town"], + "district": ["Test District"] * 4, + "county": ["Test County"] * 4, + "date_of_transfer": [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + "2024-01-04", + ], + } + ) + price_paid_path = tmp_path / "price-paid.parquet" + output_streets = tmp_path / "streets.parquet" + output_addresses = tmp_path / "addresses.parquet" + price_paid.write_parquet(price_paid_path) + + _write_street_rollups( + postcode_metrics=postcode_metrics, + price_paid_path=price_paid_path, + output_streets=output_streets, + output_addresses=output_addresses, + radius_m=radius_m, + ) + + streets = pl.read_parquet(output_streets).sort("street") + addresses = pl.read_parquet(output_addresses) + + assert streets["street"].to_list() == ["Elm Street", "Oak Road"] + assert streets[STREET_TREE_COVERAGE_COL].to_list() == pytest.approx([50.0, 16.7]) + assert streets.select("street", STREET_TREE_DENSITY_COL).rows() == [ + ("Elm Street", 100.0), + ("Oak Road", 0.0), + ] + assert percentile_col in addresses.columns + assert STREET_TREE_COVERAGE_COL in addresses.columns + assert STREET_TREE_DENSITY_COL in addresses.columns diff --git a/pipeline/transform/tree_density.py b/pipeline/transform/tree_density.py new file mode 100644 index 0000000..146e7d2 --- /dev/null +++ b/pipeline/transform/tree_density.py @@ -0,0 +1,635 @@ +"""Derive street-scale tree density metrics from Forest Research TOW data. + +The Forest Research Trees Outside Woodland release is an Esri File Geodatabase +inside property-data/FR_TOW_V1_ALL.zip. This transformer computes a compact +postcode-level metric from the tree polygons, then optionally rolls that up to +Price Paid street names so the dashboard can answer "what is this address's +street like?" without loading the full geodatabase at runtime. +""" + +from __future__ import annotations + +import argparse +import math +import shutil +import zipfile +from pathlib import Path + +import numpy as np +import polars as pl +import pyogrio +import shapely +from scipy.spatial import cKDTree + + +DEFAULT_TOW_TYPES = ("Lone Tree", "Group of Trees") +TOW_GDB_NAME = "FR_TOW_V1_ALL.gdb" +STREET_TREE_DENSITY_COL = "Street tree density percentile" +STREET_TREE_COVERAGE_COL = "Street tree coverage (%)" +POSTCODE_DENSITY_COL = "Tree canopy density within {radius}m (%)" +POSTCODE_DENSITY_PERCENTILE_COL = "Tree canopy density percentile within {radius}m" +POSTCODE_AREA_COL = "Tree canopy area within {radius}m (sqm)" +POSTCODE_COUNT_COL = "Tree features within {radius}m" +POSTCODE_HEIGHT_COL = "Mean TOW height within {radius}m (m)" + + +def _safe_extract_zip(zip_path: Path, extract_dir: Path, force: bool) -> Path: + """Extract the TOW zip and return the extracted .gdb path.""" + gdb_path = extract_dir / TOW_GDB_NAME + if gdb_path.exists() and not force: + print(f"Using existing extracted geodatabase: {gdb_path}") + return gdb_path + + if force and extract_dir.exists(): + shutil.rmtree(extract_dir) + elif extract_dir.exists(): + print(f"Removing incomplete extraction directory: {extract_dir}") + shutil.rmtree(extract_dir) + + tmp_dir = extract_dir.with_name(f".{extract_dir.name}.tmp") + if tmp_dir.exists(): + shutil.rmtree(tmp_dir) + tmp_dir.mkdir(parents=True) + + root = tmp_dir.resolve() + print(f"Extracting {zip_path} to {extract_dir}...") + with zipfile.ZipFile(zip_path) as archive: + for member in archive.infolist(): + target = (tmp_dir / member.filename).resolve() + if root != target and root not in target.parents: + raise ValueError(f"Unsafe path in zip archive: {member.filename}") + + if member.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + + target.parent.mkdir(parents=True, exist_ok=True) + with archive.open(member) as source, target.open("wb") as dest: + shutil.copyfileobj(source, dest, length=1024 * 1024) + + if not (tmp_dir / TOW_GDB_NAME).exists(): + raise FileNotFoundError(f"{TOW_GDB_NAME} was not found inside {zip_path}") + + tmp_dir.rename(extract_dir) + print(f"Extracted geodatabase: {gdb_path}") + return gdb_path + + +def _tow_dataset_path( + zip_path: Path, extract_dir: Path, force_extract: bool, use_vsizip: bool +) -> str: + if use_vsizip: + return f"/vsizip/{zip_path.resolve()}/{TOW_GDB_NAME}" + return str(_safe_extract_zip(zip_path, extract_dir, force_extract)) + + +def _where_for_tow_types(tow_types: tuple[str, ...] | None) -> str | None: + if not tow_types: + return None + escaped = [tow_type.replace("'", "''") for tow_type in tow_types] + values = ", ".join(f"'{tow_type}'" for tow_type in escaped) + return f"Woodland_Type IN ({values})" + + +def _postcode_points(arcgis_path: Path, max_postcodes: int | None) -> pl.DataFrame: + points = ( + pl.scan_parquet(arcgis_path) + .filter(pl.col("ctry25cd") == "E92000001") + .filter(pl.col("doterm").is_null()) + .select( + pl.col("pcds").alias("postcode"), + pl.col("east1m").cast(pl.Float64).alias("x"), + pl.col("north1m").cast(pl.Float64).alias("y"), + ) + .drop_nulls(["postcode", "x", "y"]) + .unique("postcode") + .sort("postcode") + ) + if max_postcodes is not None: + points = points.head(max_postcodes) + df = points.collect() + print(f"Loaded {df.height:,} active English postcode points") + return df + + +def _layers(dataset_path: str, selected_layers: tuple[str, ...] | None) -> list[str]: + available = [layer for layer, _geometry_type in pyogrio.list_layers(dataset_path)] + if selected_layers is None: + return available + + missing = sorted(set(selected_layers) - set(available)) + if missing: + raise ValueError(f"Unknown TOW layer(s): {', '.join(missing)}") + return [layer for layer in available if layer in selected_layers] + + +def _metric_columns(radius_m: int) -> tuple[str, str, str, str]: + return ( + POSTCODE_DENSITY_COL.format(radius=radius_m), + POSTCODE_AREA_COL.format(radius=radius_m), + POSTCODE_COUNT_COL.format(radius=radius_m), + POSTCODE_HEIGHT_COL.format(radius=radius_m), + ) + + +def _postcode_density_percentile_col(radius_m: int) -> str: + return POSTCODE_DENSITY_PERCENTILE_COL.format(radius=radius_m) + + +def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr: + """Rank higher tree coverage higher on a 0-100 England-wide percentile scale.""" + value = pl.col(column).fill_nan(None) + non_null_count = value.count() + rank = value.rank("average") + return ( + pl.when(value.is_null()) + .then(None) + .when(value == value.min()) + .then(0.0) + .when(value == value.max()) + .then(100.0) + .when(non_null_count > 1) + .then(((rank - 1) / (non_null_count - 1) * 100).round(1)) + .otherwise(100.0) + .cast(pl.Float32) + .alias(alias) + ) + + +def _with_postcode_density_percentiles( + postcode_metrics: pl.DataFrame, radius_m: int +) -> pl.DataFrame: + density_col, _area_col, _count_col, _height_col = _metric_columns(radius_m) + return postcode_metrics.with_columns( + _coverage_percentile_expr( + density_col, + _postcode_density_percentile_col(radius_m), + ) + ) + + +def _accumulate_tree_metrics( + dataset_path: str, + points: pl.DataFrame, + radius_m: int, + tow_types: tuple[str, ...] | None, + batch_size: int, + layer_names: tuple[str, ...] | None, + max_features_per_layer: int | None, + workers: int, +) -> pl.DataFrame: + xy = points.select("x", "y").to_numpy() + tree = cKDTree(xy) + n_points = points.height + + canopy_area = np.zeros(n_points, dtype=np.float64) + feature_count = np.zeros(n_points, dtype=np.uint32) + height_weighted_sum = np.zeros(n_points, dtype=np.float64) + height_weight = np.zeros(n_points, dtype=np.float64) + + where = _where_for_tow_types(tow_types) + layers = _layers(dataset_path, layer_names) + print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}") + if where: + print(f"TOW type filter: {where}") + + columns = ["Woodland_Type", "TOW_Area_M", "MEANHT"] + total_features_seen = 0 + total_features_used = 0 + + for layer in layers: + info = pyogrio.read_info(dataset_path, layer=layer) + print(f"\nLayer {layer}: {info.get('features', 0):,} features") + layer_features_seen = 0 + + with pyogrio.open_arrow( + dataset_path, + layer=layer, + columns=columns, + where=where, + batch_size=batch_size, + use_pyarrow=True, + ) as (_meta, reader): + for batch_index, batch in enumerate(reader, start=1): + if max_features_per_layer is not None: + remaining = max_features_per_layer - layer_features_seen + if remaining <= 0: + break + if batch.num_rows > remaining: + batch = batch.slice(0, remaining) + + layer_features_seen += batch.num_rows + total_features_seen += batch.num_rows + names = batch.schema.names + area = np.asarray( + batch.column(names.index("TOW_Area_M")).to_numpy(zero_copy_only=False), + dtype=np.float64, + ) + height = np.asarray( + batch.column(names.index("MEANHT")).to_numpy(zero_copy_only=False), + dtype=np.float64, + ) + geometry = np.asarray( + batch.column(names.index("SHAPE")).to_numpy(zero_copy_only=False), + dtype=object, + ) + + valid = np.isfinite(area) & (area > 0) + if not valid.any(): + continue + + geometry = geometry[valid] + area = area[valid] + height = height[valid] + + centroids = shapely.centroid(shapely.from_wkb(geometry)) + x = shapely.get_x(centroids) + y = shapely.get_y(centroids) + valid_xy = np.isfinite(x) & np.isfinite(y) + if not valid_xy.any(): + continue + + x = x[valid_xy] + y = y[valid_xy] + area = area[valid_xy] + height = height[valid_xy] + + nearby = tree.query_ball_point( + np.column_stack((x, y)), radius_m, workers=workers + ) + lengths = np.fromiter( + (len(postcode_indexes) for postcode_indexes in nearby), + dtype=np.int32, + count=len(nearby), + ) + matching_features = lengths > 0 + if matching_features.any(): + postcode_indexes = np.concatenate( + [indexes for indexes in nearby if indexes] + ).astype(np.int64, copy=False) + feature_indexes = np.repeat( + np.flatnonzero(matching_features), lengths[matching_features] + ) + + np.add.at(canopy_area, postcode_indexes, area[feature_indexes]) + np.add.at(feature_count, postcode_indexes, 1) + + feature_height = height[feature_indexes] + valid_height = np.isfinite(feature_height) + if valid_height.any(): + height_area = area[feature_indexes][valid_height] + np.add.at( + height_weighted_sum, + postcode_indexes[valid_height], + feature_height[valid_height] * height_area, + ) + np.add.at( + height_weight, + postcode_indexes[valid_height], + height_area, + ) + + total_features_used += len(area) + if batch_index == 1 or batch_index % 25 == 0: + print( + f" batch {batch_index:,}: " + f"{total_features_seen:,} rows read, " + f"{total_features_used:,} features with usable centroids" + ) + + density_col, area_col, count_col, height_col = _metric_columns(radius_m) + buffer_area = math.pi * radius_m * radius_m + density_pct = np.minimum(canopy_area / buffer_area * 100.0, 100.0) + mean_height = np.divide( + height_weighted_sum, + height_weight, + out=np.full(n_points, np.nan, dtype=np.float64), + where=height_weight > 0, + ) + + return pl.DataFrame( + { + "postcode": points["postcode"], + area_col: canopy_area.round(1).astype(np.float32), + density_col: density_pct.round(1).astype(np.float32), + count_col: feature_count.astype(np.uint32), + height_col: np.round(mean_height, 1).astype(np.float32), + } + ).with_columns( + pl.col(height_col).fill_nan(None), + ) + + +def _clean_key_expr(column: str) -> pl.Expr: + return ( + pl.col(column) + .fill_null("") + .str.to_uppercase() + .str.replace_all(r"[^A-Z0-9]+", " ") + .str.replace_all(r"\s+", " ") + .str.strip_chars() + ) + + +def _latest_price_paid_addresses(price_paid_path: Path) -> pl.LazyFrame: + return ( + pl.scan_parquet(price_paid_path) + .select( + pl.col("postcode").str.strip_chars().str.to_uppercase().alias("postcode"), + "paon", + "saon", + "street", + "locality", + "town_city", + "district", + "county", + "date_of_transfer", + ) + .filter(pl.col("postcode").is_not_null()) + .filter(pl.col("street").is_not_null()) + .filter(_clean_key_expr("street") != "") + .with_columns( + pl.concat_str( + [pl.col("saon"), pl.col("paon"), pl.col("street")], + separator=" ", + ignore_nulls=True, + ) + .str.replace_all(r"\s+", " ") + .str.strip_chars() + .alias("pp_address"), + ) + .filter(pl.col("pp_address").is_not_null()) + .sort("date_of_transfer") + .group_by("postcode", "pp_address", maintain_order=True) + .agg( + pl.col("street").last(), + pl.col("locality").last(), + pl.col("town_city").last(), + pl.col("district").last(), + pl.col("county").last(), + ) + .with_columns( + pl.concat_str( + [ + _clean_key_expr("street"), + _clean_key_expr("town_city"), + _clean_key_expr("district"), + _clean_key_expr("county"), + ], + separator="|", + ).alias("street_key") + ) + ) + + +def _weighted_mean_expr(column: str, weight: str) -> pl.Expr: + valid = pl.col(column).is_not_null() & ~pl.col(column).is_nan() + numerator = pl.when(valid).then(pl.col(column) * pl.col(weight)).sum() + denominator = pl.when(valid).then(pl.col(weight)).sum() + return pl.when(denominator > 0).then(numerator / denominator).otherwise(None) + + +def _write_street_rollups( + postcode_metrics: pl.DataFrame, + price_paid_path: Path, + output_streets: Path | None, + output_addresses: Path | None, + radius_m: int, +) -> None: + if output_streets is None and output_addresses is None: + return + + density_col, area_col, count_col, height_col = _metric_columns(radius_m) + metrics = postcode_metrics.lazy() + addresses = _latest_price_paid_addresses(price_paid_path).join( + metrics, on="postcode", how="inner" + ) + + per_postcode = ( + addresses.group_by( + "street_key", + "postcode", + "street", + "locality", + "town_city", + "district", + "county", + ) + .agg( + pl.len().alias("address_count"), + pl.col(density_col).first(), + pl.col(area_col).first(), + pl.col(count_col).first(), + pl.col(height_col).first(), + ) + .collect() + ) + + streets = ( + per_postcode.lazy() + .group_by("street_key") + .agg( + pl.col("street").first(), + pl.col("locality").first(), + pl.col("town_city").first(), + pl.col("district").first(), + pl.col("county").first(), + pl.col("postcode").n_unique().alias("postcode_count"), + pl.col("address_count").sum().alias("address_count"), + _weighted_mean_expr(density_col, "address_count") + .round(1) + .cast(pl.Float32) + .alias(STREET_TREE_COVERAGE_COL), + _weighted_mean_expr(area_col, "address_count") + .round(1) + .cast(pl.Float32) + .alias(f"Street average {area_col}"), + _weighted_mean_expr(count_col, "address_count") + .round(1) + .cast(pl.Float32) + .alias(f"Street average {count_col}"), + _weighted_mean_expr(height_col, "address_count") + .round(1) + .cast(pl.Float32) + .alias(f"Street average {height_col}"), + ) + .with_columns( + _coverage_percentile_expr( + STREET_TREE_COVERAGE_COL, + STREET_TREE_DENSITY_COL, + ) + ) + .sort("street_key") + .collect() + ) + + if output_addresses is not None: + output_addresses.parent.mkdir(parents=True, exist_ok=True) + address_output = addresses.join( + streets.lazy().select( + "street_key", + STREET_TREE_COVERAGE_COL, + STREET_TREE_DENSITY_COL, + ), + on="street_key", + how="left", + ) + address_output.sink_parquet(output_addresses, compression="zstd") + print(f"Wrote address tree-density join: {output_addresses}") + + if output_streets is not None: + output_streets.parent.mkdir(parents=True, exist_ok=True) + streets.write_parquet(output_streets, compression="zstd") + print(f"Wrote street tree-density rollup: {output_streets}") + + +def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None: + if value is None: + return None + if value.lower() == "all": + return None + parts = tuple(part.strip() for part in value.split(",") if part.strip()) + return parts or None + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Build postcode and street tree-density metrics from FR_TOW_V1_ALL.zip" + ) + parser.add_argument( + "--tow-zip", + type=Path, + default=Path("property-data/FR_TOW_V1_ALL.zip"), + help="Forest Research TOW zip containing FR_TOW_V1_ALL.gdb", + ) + parser.add_argument( + "--extract-dir", + type=Path, + default=Path("property-data/fr_tow_v1_all"), + help="Directory where the zip is extracted", + ) + parser.add_argument( + "--force-extract", + action="store_true", + help="Re-extract the TOW zip even if the geodatabase already exists", + ) + parser.add_argument( + "--use-vsizip", + action="store_true", + help="Read the geodatabase directly from the zip instead of extracting it", + ) + parser.add_argument( + "--arcgis", + type=Path, + default=Path("property-data/arcgis_data.parquet"), + help="Postcode centroid parquet with east1m/north1m columns", + ) + parser.add_argument( + "--price-paid", + type=Path, + default=None, + help="Optional Price Paid parquet used to roll postcode metrics up to streets", + ) + parser.add_argument( + "--output-postcodes", + type=Path, + required=True, + help="Output postcode-level tree-density parquet", + ) + parser.add_argument( + "--output-streets", + type=Path, + default=None, + help="Optional output street-level tree-density parquet", + ) + parser.add_argument( + "--output-addresses", + type=Path, + default=None, + help="Optional output address/street join parquet keyed by postcode and pp_address", + ) + parser.add_argument( + "--radius-m", + type=int, + default=50, + help="Radius around each postcode centroid used as the street-scale buffer", + ) + parser.add_argument( + "--tow-types", + default=",".join(DEFAULT_TOW_TYPES), + help='Comma-separated Woodland_Type values to include, or "all"', + ) + parser.add_argument( + "--layers", + default=None, + help="Optional comma-separated subset of TOW layers for testing", + ) + parser.add_argument( + "--batch-size", + type=int, + default=65_536, + help="Arrow batch size for reading TOW features", + ) + parser.add_argument( + "--workers", + type=int, + default=-1, + help="Worker count passed to scipy cKDTree.query_ball_point", + ) + parser.add_argument( + "--max-postcodes", + type=int, + default=None, + help="Testing only: process the first N postcode points", + ) + parser.add_argument( + "--max-features-per-layer", + type=int, + default=None, + help="Testing only: process at most N TOW features per layer", + ) + args = parser.parse_args() + + if (args.output_streets or args.output_addresses) and args.price_paid is None: + raise SystemExit("--price-paid is required when writing street/address outputs") + + if args.radius_m <= 0: + raise SystemExit("--radius-m must be greater than zero") + + dataset_path = _tow_dataset_path( + args.tow_zip, args.extract_dir, args.force_extract, args.use_vsizip + ) + points = _postcode_points(args.arcgis, args.max_postcodes) + tow_types = _parse_csv_arg(args.tow_types) + layer_names = _parse_csv_arg(args.layers) + + postcode_metrics = _accumulate_tree_metrics( + dataset_path=dataset_path, + points=points, + radius_m=args.radius_m, + tow_types=tow_types, + batch_size=args.batch_size, + layer_names=layer_names, + max_features_per_layer=args.max_features_per_layer, + workers=args.workers, + ) + postcode_metrics = _with_postcode_density_percentiles( + postcode_metrics, args.radius_m + ) + + args.output_postcodes.parent.mkdir(parents=True, exist_ok=True) + postcode_metrics.write_parquet(args.output_postcodes, compression="zstd") + print(f"\nWrote postcode tree-density metrics: {args.output_postcodes}") + + if args.price_paid is not None: + _write_street_rollups( + postcode_metrics=postcode_metrics, + price_paid_path=args.price_paid, + output_streets=args.output_streets, + output_addresses=args.output_addresses, + radius_m=args.radius_m, + ) + + +if __name__ == "__main__": + main() diff --git a/server-rs/src/og_middleware.rs b/server-rs/src/og_middleware.rs index c987f1d..1220605 100644 --- a/server-rs/src/og_middleware.rs +++ b/server-rs/src/og_middleware.rs @@ -49,8 +49,8 @@ fn seo_page_for_path(path: &str) -> Option { match path { "/" => Some(SeoPage { canonical_path: "/", - title: "Perfect Postcode - Find where to buy before browsing listings", - description: "Search every postcode by budget, commute, schools, safety, noise, broadband, prices and more. Build a better home-buying shortlist before viewings.", + title: "Find the best postcodes and areas to live in England | Perfect Postcode", + description: "Discover where to live by comparing England postcodes by budget, commute, schools, crime, noise, broadband, property prices and local amenities before viewing homes.", indexable: true, }), "/learn" | "/support" => Some(SeoPage { diff --git a/server-rs/src/routes/export.rs b/server-rs/src/routes/export.rs index f60ae09..e4c602f 100644 --- a/server-rs/src/routes/export.rs +++ b/server-rs/src/routes/export.rs @@ -287,7 +287,7 @@ pub async fn get_export( } // Sort by property count descending - postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count)); + postcode_aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count)); // Sample if too many postcodes let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES; @@ -307,7 +307,7 @@ pub async fn get_export( postcode_aggs.swap(pick, swap_idx); } postcode_aggs.truncate(MAX_EXPORT_POSTCODES); - postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count)); + postcode_aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count)); } // Determine column order: filter features first, then remaining diff --git a/video/render.sh b/video/render.sh index e7c54ba..505c920 100755 --- a/video/render.sh +++ b/video/render.sh @@ -2,10 +2,10 @@ # # End-to-end re-render of the dashboard demo videos. # -# All per-storyboard knobs (aspect, fps, bitrate, prompt text, voice persona, -# poster timestamp, brand strings…) live on the Storyboard objects in -# src/storyboard.ts. To add a vertical cut or change the voice, edit that -# file — this script only handles target/auth/transport concerns. +# All per-storyboard knobs (aspect, fps, bitrate, prompt text, localized +# narration, voice persona, poster timestamp, brand strings…) live in +# src/storyboard.ts. A single visual storyboard can expand into multiple +# language variants there; this script renders every emitted slug. # # Two targets: # local (default) — assumes the docker-compose stack on host.docker.internal, diff --git a/video/src/browser.ts b/video/src/browser.ts index 1204542..d6916b6 100644 --- a/video/src/browser.ts +++ b/video/src/browser.ts @@ -50,6 +50,9 @@ export async function launchRecordingBrowser( deviceScaleFactor: storyboard.video.captureScale, recordVideo: { dir: opts.recordDir, size: viewport }, }); + await context.addInitScript((appLanguage) => { + if (appLanguage) localStorage.setItem('language', appLanguage); + }, storyboard.content.appLanguage ?? 'en'); await suppressDevServerNoise(context); return { browser, context }; } diff --git a/video/src/dom.ts b/video/src/dom.ts index 13e0d5d..6758628 100644 --- a/video/src/dom.ts +++ b/video/src/dom.ts @@ -307,12 +307,19 @@ export async function showOutro( document.getElementById('__demo-caption')?.classList.remove('visible'); const el = document.createElement('div'); el.id = '__demo-outro'; - el.innerHTML = ` -
-
${brand}
-
${tagline}
-
${url}
-
`; + const card = document.createElement('div'); + card.id = '__demo-outro-card'; + const brandEl = document.createElement('div'); + brandEl.id = '__demo-outro-brand'; + brandEl.textContent = brand; + const taglineEl = document.createElement('div'); + taglineEl.id = '__demo-outro-tagline'; + taglineEl.textContent = tagline; + const urlEl = document.createElement('div'); + urlEl.id = '__demo-outro-url'; + urlEl.textContent = url; + card.append(brandEl, taglineEl, urlEl); + el.appendChild(card); document.body.appendChild(el); requestAnimationFrame(() => { requestAnimationFrame(() => el.classList.add('visible')); diff --git a/video/src/preflight.ts b/video/src/preflight.ts index e312b64..8aa3ce8 100644 --- a/video/src/preflight.ts +++ b/video/src/preflight.ts @@ -46,6 +46,7 @@ function emitScript(storyboard: Storyboard): string { voice: { instruct: storyboard.voice.instruct, language: storyboard.voice.language, + referenceText: storyboard.voice.referenceText, temperature: storyboard.voice.temperature ?? 0.6, topP: storyboard.voice.topP ?? 0.9, seed: storyboard.voice.seed ?? 42, @@ -68,6 +69,7 @@ function main(): void { const index = { storyboards: storyboards.map((sb) => ({ name: sb.name, + locale: sb.locale ?? sb.content.appLanguage, aspect: sb.video.aspect, outputFps: sb.video.outputFps, minDurationS: sb.video.minDurationS, diff --git a/video/src/runner.ts b/video/src/runner.ts index 6da145e..b98d25f 100644 --- a/video/src/runner.ts +++ b/video/src/runner.ts @@ -25,6 +25,7 @@ export interface RunnerResult { const MAP_ZOOM_WHEEL_DELTA = -120; const FALLBACK_MS_PER_WORD = 750; const FALLBACK_TAIL_BUFFER_MS = 800; +const CJK_CHARS_PER_FALLBACK_WORD = 2; interface SynthCue { cueIndex: number; @@ -266,10 +267,15 @@ function loadSynthIndex(storyboard: Storyboard): SynthCue[] { return storyboard.cues.map((cue, cueIndex) => ({ cueIndex, text: cue.text, - durationMs: - cue.text.split(/\s+/).filter(Boolean).length * FALLBACK_MS_PER_WORD + - FALLBACK_TAIL_BUFFER_MS, + durationMs: estimateFallbackDurationMs(cue.text), })); } +function estimateFallbackDurationMs(text: string): number { + const wordCount = text.split(/\s+/).filter(Boolean).length; + const cjkCount = text.match(/\p{Script=Han}/gu)?.length ?? 0; + const units = Math.max(wordCount, Math.ceil(cjkCount / CJK_CHARS_PER_FALLBACK_WORD), 1); + return units * FALLBACK_MS_PER_WORD + FALLBACK_TAIL_BUFFER_MS; +} + export type { Page }; diff --git a/video/src/script.ts b/video/src/script.ts index 199ab96..61682f2 100644 --- a/video/src/script.ts +++ b/video/src/script.ts @@ -124,6 +124,8 @@ export interface VoiceConfig { instruct: string; /** Qwen3-TTS language string, e.g. "English". */ language: string; + /** Reference utterance used when minting a generated voice for this language. */ + referenceText?: string; /** Sampling temperature (default 0.6). */ temperature?: number; /** Top-p nucleus sampling (default 0.9). */ @@ -147,6 +149,8 @@ export interface BrandConfig { export interface ContentConfig { /** Prompt text typed into the AI box during the cold open. */ promptText: string; + /** Frontend i18n language code to set before loading the dashboard. */ + appLanguage?: string; /** Cold-open zoom multiplier on the AI card. */ aiZoomScale: number; initialMapView: { lat: number; lon: number; zoom: number }; @@ -177,6 +181,8 @@ export interface TravelTimeFilter { */ export interface Storyboard { name: string; + /** Optional language/variant code, used for manifests and logging. */ + locale?: string; video: VideoConfig; voice: VoiceConfig; content: ContentConfig; diff --git a/video/src/storyboard.ts b/video/src/storyboard.ts index 4ac441d..c56a923 100644 --- a/video/src/storyboard.ts +++ b/video/src/storyboard.ts @@ -6,8 +6,9 @@ import { el, type Storyboard } from './script.js'; * Each entry is a fully self-contained Storyboard: video knobs (aspect, * bitrate, fps), voice persona (Qwen3-TTS instruct + language + sampling), * stubbed AI response, brand strings, AND the cue list. There is no shared - * global state — to ship a vertical cut, a different prompt, or a different - * voice, push another item onto this array. + * global state. The exported array can contain generated variants, so a + * shared visual storyboard can render once per language without repeating + * its activity sequence. * * `name` doubles as the on-disk slug. The pipeline writes per-storyboard * artefacts to `output//` and publishes `.mp4` / `.jpg` @@ -23,15 +24,6 @@ import { el, type Storyboard } from './script.js'; * before the next cue's gap). */ -const PROMPT_TEXT = 'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area'; - -const BRAND = { - name: 'Perfect Postcode', - tagline: 'Your best chance to find your next perfect home.', - url: 'https://perfect-postcode.co.uk', -}; - - const AI_ZOOM_SCALE = 2.4; const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]'; @@ -39,103 +31,238 @@ const TT_SLIDER_MAX = 120; const TT_DRAG_FROM_MIN = 35; const TT_DRAG_TO_MIN = 20; -const BRITISH_MALE_NARRATOR = - 'Calm and cheerful young British male narrator from the North of England with a ' + - 'strong Manchester accent.'; +type RecordingLocale = 'en' | 'de' | 'zh' | 'hi'; -const DEFAULT_CUES: Storyboard['cues'] = [ - { - text: 'Start by describing the type of place you\'re looking for', - gapBeforeMs: 0, - tail: [ - { - kind: 'type', - selector: '[data-tutorial="ai-filters"] textarea', - text: PROMPT_TEXT, - durationMs: 3000, - }, - { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 }, - ], - }, - { - text: 'The dashboard will show you the likeliest places that will meet your expectations', - gapBeforeMs: 400, - during: [{ kind: 'zoomReset', durationMs: 1400 }], - tail: [{ kind: 'wait', durationMs: 500 }], - }, +interface RecordingLocalization { + name: string; + appLanguage: string; + ttsLanguage: string; + voiceInstruct: string; + voiceReferenceText: string; + promptText: string; + travelTimeLabel: string; + exportButtonTitle: string; + brand: { + name: string; + tagline: string; + url: string; + }; + cues: { + describe: string; + dashboard: string; + filters: string; + details: string; + shortlist: string; + }; +} - { - text: `Adjust the filters to narrow down to the best candidates`, - gapBeforeMs: 500, - during: [ - { - kind: 'dragSlider', - thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`, - trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`, - toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX, - durationMs: 1000, - }, - ], - tail: [{ kind: 'wait', durationMs: 400 }], - }, +const BRAND_URL = 'https://perfect-postcode.co.uk'; - { - text: 'And now it\'s time to dig into the details. Looks good to me!', - gapBeforeMs: 500, - during: [ - { kind: 'cursorScale', scale: 1.4, durationMs: 200 }, - { - kind: 'mapZoom', - target: { kind: 'point', x: 1140, y: 605 }, - steps: 18, - durationMs: 1500, - }, - ], - tail: [ - // Wait for the post-zoom /api/postcodes response and a redraw - // before the click — otherwise the click can fire on a stale - // frame and miss the polygon. - { kind: 'wait', durationMs: 500 }, - { - kind: 'click', - target: { kind: 'point', x: 1140, y: 605 }, - durationMs: 700, - }, - { kind: 'cursorScale', scale: 1, durationMs: 280 }, - // Linger so the climax cue lands on the right-pane reveal. - { kind: 'wait', durationMs: 1500 }, - ], +const RECORDING_LOCALIZATIONS: Record = { + en: { + name: 'recording', + appLanguage: 'en', + ttsLanguage: 'English', + voiceInstruct: + 'Calm and cheerful young British male narrator from the North of England with a ' + + 'strong Manchester accent.', + voiceReferenceText: + "Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.", + promptText: + 'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area', + travelTimeLabel: 'Manchester city centre', + exportButtonTitle: 'Export to Excel', + brand: { + name: 'Perfect Postcode', + tagline: 'Your best chance to find your next perfect home.', + url: BRAND_URL, + }, + cues: { + describe: "Start by describing the type of place you're looking for", + dashboard: 'The dashboard will show you the likeliest places that will meet your expectations', + filters: 'Adjust the filters to narrow down to the best candidates', + details: "And now it's time to dig into the details. Looks good to me!", + shortlist: + 'Now you can take your shortlist and start looking for your next home in your perfect postcode.', + }, }, + de: { + name: 'recording-de', + appLanguage: 'de', + ttsLanguage: 'German', + voiceInstruct: + 'Calm and cheerful German male narrator with clear standard German pronunciation ' + + 'and a friendly, practical delivery.', + voiceReferenceText: + 'Willkommen zur Demonstration. Diese Sprecherstimme hören Sie im gesamten Video.', + promptText: + 'Wohnungen unter £300k, 35 Min. Pendelzeit nach Manchester, nahe einer herausragenden Schule in einer sehr kriminalitätsarmen Gegend', + travelTimeLabel: 'Stadtzentrum Manchester', + exportButtonTitle: 'Als Excel exportieren', + brand: { + name: 'Perfect Postcode', + tagline: 'Ihre beste Chance, Ihr nächstes perfektes Zuhause zu finden.', + url: BRAND_URL, + }, + cues: { + describe: 'Beschreiben Sie zuerst, wonach Sie suchen.', + dashboard: 'Das Dashboard zeigt die Orte, die Ihre Erwartungen am ehesten erfüllen.', + filters: 'Passen Sie die Filter an, um die besten Kandidaten einzugrenzen.', + details: 'Jetzt geht es in die Details. Sieht gut aus!', + shortlist: + 'Jetzt können Sie Ihre Auswahl nehmen und Ihr nächstes Zuhause in Ihrem perfekten Postcode suchen.', + }, + }, + zh: { + name: 'recording-zh', + appLanguage: 'zh', + ttsLanguage: 'Chinese', + voiceInstruct: + 'Calm and cheerful Mandarin Chinese male narrator with clear standard Mandarin ' + + 'pronunciation and a friendly, practical delivery.', + voiceReferenceText: '欢迎观看演示。整段视频都会使用这位旁白的声音。', + promptText: '30万英镑以内的公寓,35分钟通勤到曼彻斯特,靠近优秀学校,犯罪率很低的区域', + travelTimeLabel: '曼彻斯特市中心', + exportButtonTitle: '导出为 Excel', + brand: { + name: 'Perfect Postcode', + tagline: '帮你更有把握找到下一个理想家。', + url: BRAND_URL, + }, + cues: { + describe: '先描述你想找什么样的地方', + dashboard: '仪表板会显示最符合你期望的地点', + filters: '调整筛选条件,缩小到最合适的候选区域', + details: '现在深入查看细节。看起来不错!', + shortlist: '现在你可以带着候选清单,开始寻找理想邮编里的下一个家。', + }, + }, + hi: { + name: 'recording-hi', + appLanguage: 'hi', + ttsLanguage: 'English', + voiceInstruct: + 'Calm and cheerful Indian male narrator speaking English with a strong Indian accent ' + + 'and a friendly, practical delivery.', + voiceReferenceText: + "Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.", + promptText: + 'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area', + travelTimeLabel: 'Manchester city centre', + exportButtonTitle: 'Excel में निर्यात करें', + brand: { + name: 'Perfect Postcode', + tagline: 'Your best chance to find your next perfect home.', + url: BRAND_URL, + }, + cues: { + describe: "Start by describing the type of place you're looking for", + dashboard: 'The dashboard will show you the likeliest places that will meet your expectations', + filters: 'Adjust the filters to narrow down to the best candidates', + details: "And now it's time to dig into the details. Looks good to me!", + shortlist: + 'Now you can take your shortlist and start looking for your next home in your perfect postcode.', + }, + }, +}; - { - text: 'Now you can take your shortlist and start looking for your next home in your perfect postcode.', - gapBeforeMs: 500, - during: [ - { kind: 'zoomReset', durationMs: 900 }, - { - kind: 'click', - target: el('button[title="Export to Excel"]'), - durationMs: 800, - }, - ], - tail: [{ kind: 'wait', durationMs: 800 }], - }, +function createCues(locale: RecordingLocale): Storyboard['cues'] { + const copy = RECORDING_LOCALIZATIONS[locale]; - { - text: `${BRAND.name}. ${BRAND.tagline}`, - gapBeforeMs: 600, - during: [ - { - kind: 'showOutro', - brand: BRAND.name, - tagline: BRAND.tagline, - url: BRAND.url, - durationMs: 0, - }, - ], - tail: [{ kind: 'wait', durationMs: 1500 }], - }, -]; + return [ + { + text: copy.cues.describe, + gapBeforeMs: 0, + tail: [ + { + kind: 'type', + selector: '[data-tutorial="ai-filters"] textarea', + text: copy.promptText, + durationMs: 3000, + }, + { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 }, + ], + }, + { + text: copy.cues.dashboard, + gapBeforeMs: 400, + during: [{ kind: 'zoomReset', durationMs: 1400 }], + tail: [{ kind: 'wait', durationMs: 500 }], + }, + + { + text: copy.cues.filters, + gapBeforeMs: 500, + during: [ + { + kind: 'dragSlider', + thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`, + trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`, + toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX, + durationMs: 1000, + }, + ], + tail: [{ kind: 'wait', durationMs: 400 }], + }, + + { + text: copy.cues.details, + gapBeforeMs: 500, + during: [ + { kind: 'cursorScale', scale: 1.4, durationMs: 200 }, + { + kind: 'mapZoom', + target: { kind: 'point', x: 1140, y: 605 }, + steps: 18, + durationMs: 1500, + }, + ], + tail: [ + // Wait for the post-zoom /api/postcodes response and a redraw + // before the click — otherwise the click can fire on a stale + // frame and miss the polygon. + { kind: 'wait', durationMs: 500 }, + { + kind: 'click', + target: { kind: 'point', x: 1140, y: 605 }, + durationMs: 700, + }, + { kind: 'cursorScale', scale: 1, durationMs: 280 }, + // Linger so the climax cue lands on the right-pane reveal. + { kind: 'wait', durationMs: 1500 }, + ], + }, + + { + text: copy.cues.shortlist, + gapBeforeMs: 500, + during: [ + { kind: 'zoomReset', durationMs: 900 }, + { + kind: 'click', + target: el(`button[title="${copy.exportButtonTitle}"]`), + durationMs: 800, + }, + ], + tail: [{ kind: 'wait', durationMs: 800 }], + }, + + { + text: `${copy.brand.name}. ${copy.brand.tagline}`, + gapBeforeMs: 600, + during: [ + { + kind: 'showOutro', + brand: copy.brand.name, + tagline: copy.brand.tagline, + url: copy.brand.url, + durationMs: 0, + }, + ], + tail: [{ kind: 'wait', durationMs: 1500 }], + }, + ]; +} const DEFAULT_PRE: Storyboard['pre'] = [ { kind: 'clearVignette', durationMs: 0 }, @@ -149,9 +276,12 @@ const DEFAULT_PRE: Storyboard['pre'] = [ { kind: 'wait', durationMs: 140 }, ]; -export const storyboards: Storyboard[] = [ - { - name: 'recording', +function createRecordingStoryboard(locale: RecordingLocale): Storyboard { + const copy = RECORDING_LOCALIZATIONS[locale]; + + return { + name: copy.name, + locale, video: { aspect: '16x9', captureScale: 1, @@ -168,23 +298,25 @@ export const storyboards: Storyboard[] = [ posterTimeS: 16, }, voice: { - instruct: BRITISH_MALE_NARRATOR, - language: 'English', + instruct: copy.voiceInstruct, + language: copy.ttsLanguage, + referenceText: copy.voiceReferenceText, temperature: 0.6, topP: 0.9, seed: 42, }, content: { - promptText: PROMPT_TEXT, + promptText: copy.promptText, + appLanguage: copy.appLanguage, aiZoomScale: AI_ZOOM_SCALE, initialMapView: { lat: 53.4795, lon: -2.2451, zoom: 11.5 }, // Filters returned by the AI stub. Keys MUST match real feature names // from /api/features (verified against the running server's schema). stubbedFilters: { - 'Property type': ['Flats/Maisonettes', 'Terraced'], - 'Estimated current price': [175000, 450000], + 'Property type': ['Flats/Maisonettes'], + 'Estimated current price': [0, 300000], 'Serious crime per 1k residents (avg/yr)': [0, 55], - 'Noise (dB)': [50, 68], + 'Outstanding primary schools within 2km': [1, 10], }, // Travel-time filters returned by the AI stub. Slug matches the real // /api/travel-destinations?mode=transit response. @@ -192,7 +324,7 @@ export const storyboards: Storyboard[] = [ { mode: 'transit', slug: 'manchester', - label: 'Manchester city centre', + label: copy.travelTimeLabel, max: TT_DRAG_FROM_MIN, }, ], @@ -200,12 +332,16 @@ export const storyboards: Storyboard[] = [ travelTimeSliderMax: TT_SLIDER_MAX, travelTimeDragFromMin: TT_DRAG_FROM_MIN, travelTimeDragToMin: TT_DRAG_TO_MIN, - brand: BRAND, + brand: copy.brand, }, pre: DEFAULT_PRE, - cues: DEFAULT_CUES, - }, -]; + cues: createCues(locale), + }; +} + +export const storyboards: Storyboard[] = (['en', 'de', 'zh', 'hi'] as const).map((locale) => + createRecordingStoryboard(locale) +); export function getStoryboard(name: string): Storyboard { const sb = storyboards.find((s) => s.name === name); diff --git a/video/tts/synth.py b/video/tts/synth.py index d8460f6..c4b8abe 100644 --- a/video/tts/synth.py +++ b/video/tts/synth.py @@ -116,6 +116,10 @@ def cached_index_matches( cues: list[dict], instruct: str, language: str, + reference_text: str, + design_model: str, + clone_model: str, + reference_audio: str, seed: int, temperature: float, top_p: float, @@ -123,7 +127,8 @@ def cached_index_matches( """Return True iff index_path's cue list lines up with `cues` 1:1. Compared fields: ``cueIndex``, ``text``, ``gapBeforeMs`` plus the synth - settings (``instruct``, ``language``, ``seed``, ``temperature``, ``top_p``). + settings (``instruct``, ``language``, reference text, models, ``seed``, + ``temperature``, ``top_p``). All cue WAV files must also exist on disk. Mismatched length, reordered cues, or a missing WAV invalidate the cache. """ @@ -135,6 +140,12 @@ def cached_index_matches( return False if cached.get("instruct") != instruct or cached.get("language") != language: return False + if cached.get("referenceText") != reference_text: + return False + if cached.get("designModel") != design_model or cached.get("cloneModel") != clone_model: + return False + if cached.get("referenceAudio", "") != reference_audio: + return False if int(cached.get("seed", -1)) != seed: return False if float(cached.get("temperature", -1)) != temperature: @@ -170,6 +181,7 @@ def _resolve_reference( audio_dir: Path, instruct: str, language: str, + reference_text: str, seed: int, temperature: float, top_p: float, @@ -178,8 +190,8 @@ def _resolve_reference( If --reference-audio is supplied, validate and use it directly. Otherwise mint one via VoiceDesign (cached on disk; cache invalidates when the - persona/sampling/seed changes). The design model is unloaded before - returning so the clone model can claim the GPU. + persona/language/reference/sampling/seed changes). The design model is + unloaded before returning so the clone model can claim the GPU. """ if args.reference_audio is not None: if not args.reference_audio.exists(): @@ -201,7 +213,7 @@ def _resolve_reference( "seed": seed, "temperature": temperature, "topP": top_p, - "text": REFERENCE_TEXT, + "text": reference_text, } if ( ref_wav_path.exists() @@ -209,16 +221,16 @@ def _resolve_reference( and _safe_load_json(ref_meta_path) == ref_meta ): print(f"[synth] reusing cached voice reference {ref_wav_path.name}", flush=True) - return ref_wav_path, REFERENCE_TEXT + return ref_wav_path, reference_text print( - f"[synth] minting voice reference via VoiceDesign: «{REFERENCE_TEXT}»", + f"[synth] minting voice reference via VoiceDesign: «{reference_text}»", flush=True, ) design_model = load_model(args.design_model, args.device) seed_everything(seed) ref_wavs, ref_sr = design_model.generate_voice_design( - text=[REFERENCE_TEXT], + text=[reference_text], language=language, instruct=instruct, do_sample=True, @@ -237,7 +249,7 @@ def _resolve_reference( if torch.cuda.is_available(): torch.cuda.empty_cache() - return ref_wav_path, REFERENCE_TEXT + return ref_wav_path, reference_text def main() -> int: @@ -266,21 +278,30 @@ def main() -> int: return 1 instruct = voice["instruct"] language = voice["language"] + reference_text = str(voice.get("referenceText") or REFERENCE_TEXT) temperature = float(voice.get("temperature", 0.6)) top_p = float(voice.get("topP", 0.9)) seed = int(voice.get("seed", 42)) + reference_audio_cache_key = ( + str(args.reference_audio.resolve()) if args.reference_audio is not None else "" + ) audio_dir.mkdir(parents=True, exist_ok=True) # Skip generation when the existing audio matches the script — same cue # texts and same gapBeforeMs values in the same order, AND same synth - # settings (instruct/seed/temperature/top_p). Saves ~30s of GPU time when - # iterating on activity timing without changing narration or persona. + # settings (instruct/language/reference/model/seed/temperature/top_p). + # Saves ~30s of GPU time when iterating on activity timing without + # changing narration or persona. if cached_index_matches( audio_dir / "index.json", cues, instruct, language, + reference_text, + args.design_model, + args.clone_model, + reference_audio_cache_key, seed, temperature, top_p, @@ -308,7 +329,7 @@ def main() -> int: # own voice. The reference WAV is cached so subsequent runs only load # the clone model (saves ~20s + 3.4 GB of disk download). ref_wav_path, ref_text = _resolve_reference( - args, audio_dir, instruct, language, seed, temperature, top_p + args, audio_dir, instruct, language, reference_text, seed, temperature, top_p ) print( @@ -367,6 +388,7 @@ def main() -> int: "language": language, "designModel": args.design_model, "cloneModel": args.clone_model, + "referenceAudio": reference_audio_cache_key, "referenceText": ref_text, "seed": seed, "temperature": temperature,