More FE changes

2026-05-09 09:43:41 +01:00 · 2026-05-09 09:43:41 +01:00 · a48eb945e0
commit a48eb945e0
parent f114ada255
48 changed files with 4127 additions and 1751 deletions
--- a/video/render.sh
+++ b/video/render.sh
@ -10,15 +10,17 @@
 #   ./render.sh                # full pipeline (uses cached auth.json if fresh)
 #   ./render.sh --fresh-auth   # force re-auth even if auth.json exists
 #   ./render.sh --no-encode    # stop at WebM, skip MP4 encode
+#   ./render.sh --no-audio     # skip Qwen3-TTS narration; publish silent MP4
 #   FORCE_AUTH=1 ./render.sh   # same as --fresh-auth
 #   APP_URL=http://localhost:3001 ./render.sh   # override frontend URL
+#   TTS_SPEAKER=aiden ./render.sh               # override CustomVoice speaker

 set -euo pipefail

 # -- config (override via env) -------------------------------------------------
-APP_URL="${APP_URL:-http://host.docker.internal:3001}"
-PB_URL="${PB_URL:-http://host.docker.internal:8090}"
-API_URL="${API_URL:-http://host.docker.internal:8001}"
+export APP_URL="${APP_URL:-http://host.docker.internal:3001}"
+export PB_URL="${PB_URL:-http://host.docker.internal:8090}"
+export API_URL="${API_URL:-http://host.docker.internal:8001}"
 PB_ADMIN_EMAIL="${PB_ADMIN_EMAIL:-admin@propertymap.local}"
 PB_ADMIN_PASSWORD="${PB_ADMIN_PASSWORD:-propertymap-dev-2024}"
 PB_EMAIL="${PB_EMAIL:-demo-video@local.test}"
@ -34,14 +36,28 @@ PUBLISH_DIR="${PUBLISH_DIR:-../frontend/public/video}"
 # caption visible.
 POSTER_TIME_S="${POSTER_TIME_S:-16}"

+# Recorder/encoder knobs read by src/config.ts. config.ts treats these as
+# required, so they live here (the only entry point) rather than as defaults
+# scattered across TS modules. Override per-run via env.
+export ASPECT="${ASPECT:-16x9}"
+export CAPTURE_SCALE="${CAPTURE_SCALE:-1}"
+export WEBM_BITRATE="${WEBM_BITRATE:-$(awk -v s="$CAPTURE_SCALE" 'BEGIN{print (s+0>1)?"18M":"8M"}')}"
+export PROMPT_TEXT="${PROMPT_TEXT:-Flats or terraces <£450k, 35 min to Manchester, low crime}"
+export AI_ZOOM_SCALE="${AI_ZOOM_SCALE:-2.4}"
+export MAX_DURATION_S="${MAX_DURATION_S:-45}"
+export MIN_DURATION_S="${MIN_DURATION_S:-10}"
+export OUTPUT_FPS="${OUTPUT_FPS:-50}"
+
 FRESH_AUTH="${FORCE_AUTH:-0}"
 DO_ENCODE=1
+DO_AUDIO=1
 for arg in "$@"; do
  case "$arg" in
    --fresh-auth) FRESH_AUTH=1 ;;
    --no-encode) DO_ENCODE=0 ;;
+    --no-audio) DO_AUDIO=0 ;;
    -h|--help)
-      sed -n '3,18p' "$0"
+      sed -n '3,20p' "$0"
      exit 0 ;;
    *) echo "Unknown arg: $arg" >&2; exit 2 ;;
  esac
@ -124,12 +140,36 @@ else
  say "Reusing existing auth.json"
 fi

-# -- record -------------------------------------------------------------------
-say "Recording"
+# -- preflight + synth (Qwen3-TTS) -------------------------------------------
+# Synth runs BEFORE recording: one batched generate_custom_voice call across
+# all cues so the voice stays consistent. The recorder reads
+# output/audio/index.json for measured per-cue durations and sizes each
+# cue's wall-clock to fit; --no-audio skips synth and the recorder falls
+# back to a worst-case estimate.
 mkdir -p output
 # Wipe last run's leaking artifacts so the rename step picks up *this* run.
 rm -f output/recording.webm output/recording.mp4 output/page@*.webm output/page@*.webm.untrimmed
+rm -f output/narration-script.json output/narration.json
+# output/audio/ is preserved; tts/synth.py decides whether the cached WAVs
+# still match the script and skips generation when they do.

+say "Preflight: emitting narration script"
+node dist/preflight.js
+
+if [ "$DO_AUDIO" = "1" ]; then
+  if ! command -v uv >/dev/null 2>&1; then
+    fail "uv not on PATH (required for Qwen3-TTS synth). Install uv or rerun with --no-audio."
+  fi
+  say "Synthesising narration with Qwen3-TTS (speaker=${TTS_SPEAKER:-ryan}) — one batched call"
+  uv sync --project tts || fail "uv sync failed in video/tts"
+  uv run --project tts python tts/synth.py || fail "tts/synth.py failed"
+  if [ ! -s output/audio/index.json ]; then
+    fail "synth did not produce output/audio/index.json"
+  fi
+fi
+
+# -- record -------------------------------------------------------------------
+say "Recording"
 APP_URL="$APP_URL" node dist/record.js

 if [ ! -s output/recording.webm ]; then
@ -163,6 +203,20 @@ if [ "$DO_ENCODE" = "1" ]; then
  node dist/verify.js output/recording.mp4 output/poster.jpg
 fi

+# -- mux narration ------------------------------------------------------------
+# Synth already produced per-cue WAVs (in output/audio/); the recorder logged
+# each cue's videoTime against the trimmed timeline. Drop the WAVs onto the
+# mp4 with one ffmpeg adelay+amix and replace the silent recording in place.
+if [ "$DO_ENCODE" = "1" ] && [ "$DO_AUDIO" = "1" ]; then
+  if [ ! -s output/narration.json ]; then
+    fail "narration.json missing — recorder did not log cues"
+  fi
+  say "Muxing narration into output/recording.mp4"
+  uv run --project tts python tts/mux.py --replace \
+    || fail "tts/mux.py failed"
+  node dist/verify.js output/recording.mp4
+fi
+
 # -- publish to homepage ------------------------------------------------------
 # Only publish when we did the encode (otherwise we'd be copying a stale
 # mp4 next to a fresh webm). --no-encode skips this whole block.
--- a/video/src/browser.ts
+++ b/video/src/browser.ts
@ -1,5 +1,16 @@
-import { chromium, type Browser, type BrowserContext, type Page } from 'playwright';
-import { AUTH_STATE_PATH, CAPTURE_SCALE, OUTPUT_DIR, VIDEO_SIZE, VIEWPORT } from './config.js';
+import {
+  chromium,
+  type Browser,
+  type BrowserContext,
+  type Page,
+} from "playwright";
+import {
+  AUTH_STATE_PATH,
+  CAPTURE_SCALE,
+  OUTPUT_DIR,
+  VIDEO_SIZE,
+  VIEWPORT,
+} from "./config.js";

 export interface RecordingBrowser {
  browser: Browser;
@ -10,22 +21,22 @@ export async function launchRecordingBrowser(): Promise<RecordingBrowser> {
  const browser = await chromium.launch({
    headless: true,
    args: [
-      '--disable-blink-features=AutomationControlled',
-      '--enable-gpu',
-      '--use-gl=angle',
-      '--use-angle=gl-egl',
-      '--ignore-gpu-blocklist',
-      '--enable-webgl',
-      '--enable-webgl2',
-      '--enable-gpu-rasterization',
-      '--enable-zero-copy',
-      '--disable-software-rasterizer',
-      '--disable-frame-rate-limit',
-      '--disable-gpu-vsync',
-      '--disable-features=CalculateNativeWinOcclusion,IntensiveWakeUpThrottling',
-      '--disable-renderer-backgrounding',
-      '--disable-background-timer-throttling',
-      '--disable-backgrounding-occluded-windows',
+      "--disable-blink-features=AutomationControlled",
+      "--enable-gpu",
+      "--use-gl=angle",
+      "--use-angle=gl-egl",
+      "--ignore-gpu-blocklist",
+      "--enable-webgl",
+      "--enable-webgl2",
+      "--enable-gpu-rasterization",
+      "--enable-zero-copy",
+      "--disable-software-rasterizer",
+      "--disable-frame-rate-limit",
+      "--disable-gpu-vsync",
+      "--disable-features=CalculateNativeWinOcclusion,IntensiveWakeUpThrottling",
+      "--disable-renderer-backgrounding",
+      "--disable-background-timer-throttling",
+      "--disable-backgrounding-occluded-windows",
    ],
  });

@ -41,27 +52,34 @@ export async function launchRecordingBrowser(): Promise<RecordingBrowser> {

 export async function assertHardwareWebGL(page: Page): Promise<void> {
  const info = await page.evaluate(() => {
-    const canvas = document.createElement('canvas');
-    const gl = canvas.getContext('webgl2') ?? canvas.getContext('webgl');
-    if (!gl) return { webgl: false, vendor: '', renderer: '' };
+    const canvas = document.createElement("canvas");
+    const gl = canvas.getContext("webgl2");
+    if (!gl) return { webgl: false, vendor: "", renderer: "" };

-    const ext = gl.getExtension('WEBGL_debug_renderer_info');
+    const ext = gl.getExtension("WEBGL_debug_renderer_info");
    const vendor = String(
-      ext ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL) : gl.getParameter(gl.VENDOR)
+      ext
+        ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL)
+        : gl.getParameter(gl.VENDOR),
    );
    const renderer = String(
-      ext ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL) : gl.getParameter(gl.RENDERER)
+      ext
+        ? gl.getParameter(ext.UNMASKED_RENDERER_WEBGL)
+        : gl.getParameter(gl.RENDERER),
    );
    return { webgl: true, vendor, renderer };
  });

-  console.log(`[gpu] WebGL renderer: ${info.webgl ? `${info.vendor} / ${info.renderer}` : 'none'}`);
+  console.log(
+    `[gpu] WebGL renderer: ${info.webgl ? `${info.vendor} / ${info.renderer}` : "none"}`,
+  );
  if (
-    process.env.ALLOW_SOFTWARE_GL !== '1' &&
-    (!info.webgl || /SwiftShader|llvmpipe|software/i.test(`${info.vendor} ${info.renderer}`))
+    process.env.ALLOW_SOFTWARE_GL !== "1" &&
+    (!info.webgl ||
+      /SwiftShader|llvmpipe|software/i.test(`${info.vendor} ${info.renderer}`))
  ) {
    throw new Error(
-      'Recording browser did not get hardware WebGL. Set ALLOW_SOFTWARE_GL=1 to bypass this guard.'
+      "Recording browser did not get hardware WebGL. Set ALLOW_SOFTWARE_GL=1 to bypass this guard.",
    );
  }
 }
@ -71,41 +89,45 @@ async function suppressDevServerNoise(context: BrowserContext) {
    const RealWS = window.WebSocket;
    window.WebSocket = new Proxy(RealWS, {
      construct(target, args) {
-        const url = String(args[0] ?? '');
-        const proto = (args[1] as string | string[] | undefined) ?? '';
-        const protoStr = Array.isArray(proto) ? proto.join(',') : proto;
+        const url = String(args[0] ?? "");
+        const proto = (args[1] as string | string[] | undefined) ?? "";
+        const protoStr = Array.isArray(proto) ? proto.join(",") : proto;
        if (
-          protoStr.includes('vite-hmr') ||
-          protoStr.includes('webpack') ||
-          url.includes('/ws') ||
-          url.includes('sockjs-node')
+          protoStr.includes("vite-hmr") ||
+          protoStr.includes("webpack") ||
+          url.includes("/ws") ||
+          url.includes("sockjs-node")
        ) {
          const fake = new EventTarget() as WebSocket;
          Object.defineProperties(fake, {
            readyState: { value: RealWS.CLOSED },
            url: { value: url },
-            protocol: { value: '' },
-            extensions: { value: '' },
+            protocol: { value: "" },
+            extensions: { value: "" },
            bufferedAmount: { value: 0 },
-            binaryType: { value: 'blob', writable: true },
+            binaryType: { value: "blob", writable: true },
          });
          fake.send = () => {};
-          fake.close = () => fake.dispatchEvent(new Event('close'));
-          queueMicrotask(() => fake.dispatchEvent(new Event('close')));
+          fake.close = () => fake.dispatchEvent(new Event("close"));
+          queueMicrotask(() => fake.dispatchEvent(new Event("close")));
          return fake;
        }
        return Reflect.construct(target, args);
      },
    });

-    Object.defineProperty(window.location, 'reload', {
+    Object.defineProperty(window.location, "reload", {
      value: () => {},
      configurable: true,
    });
-    window.addEventListener('error', (e) => e.stopImmediatePropagation(), true);
-    window.addEventListener('unhandledrejection', (e) => e.stopImmediatePropagation(), true);
+    window.addEventListener("error", (e) => e.stopImmediatePropagation(), true);
+    window.addEventListener(
+      "unhandledrejection",
+      (e) => e.stopImmediatePropagation(),
+      true,
+    );

-    const styleEl = document.createElement('style');
+    const styleEl = document.createElement("style");
    styleEl.textContent = `
      vite-error-overlay,
      wds-overlay,
@ -126,12 +148,12 @@ async function suppressDevServerNoise(context: BrowserContext) {

    const killOverlay = (node: Element) => {
      const tag = node.tagName?.toLowerCase();
-      const id = (node as HTMLElement).id?.toLowerCase() ?? '';
+      const id = (node as HTMLElement).id?.toLowerCase() ?? "";
      if (
-        tag === 'vite-error-overlay' ||
-        tag === 'wds-overlay' ||
-        id.includes('webpack-dev-server-client') ||
-        id.includes('webpack-error')
+        tag === "vite-error-overlay" ||
+        tag === "wds-overlay" ||
+        id.includes("webpack-dev-server-client") ||
+        id.includes("webpack-error")
      ) {
        (node as HTMLElement).remove();
      }
@ -143,10 +165,11 @@ async function suppressDevServerNoise(context: BrowserContext) {
        });
      }
    });
-    if (document.body) obs.observe(document.body, { childList: true, subtree: true });
+    if (document.body)
+      obs.observe(document.body, { childList: true, subtree: true });
    else {
-      document.addEventListener('DOMContentLoaded', () =>
-        obs.observe(document.body, { childList: true, subtree: true })
+      document.addEventListener("DOMContentLoaded", () =>
+        obs.observe(document.body, { childList: true, subtree: true }),
      );
    }
  });
--- a/video/src/config.ts
+++ b/video/src/config.ts
@ -1,46 +1,66 @@
-export const APP_URL = process.env.APP_URL ?? 'http://host.docker.internal:3001';
-export const DASHBOARD_PATH = '/dashboard';
+function requiredEnv(name: string): string {
+  const value = process.env[name];
+  if (!value) {
+    throw new Error(`${name} is required`);
+  }
+  return value;
+}

-export const AUTH_STATE_PATH = 'auth.json';
-export const OUTPUT_DIR = 'output';
+function requiredNumberEnv(name: string): number {
+  const value = Number(requiredEnv(name));
+  if (!Number.isFinite(value)) {
+    throw new Error(`${name} must be a finite number`);
+  }
+  return value;
+}

-const aspect = process.env.ASPECT ?? '16x9';
+export const APP_URL = requiredEnv("APP_URL");
+export const DASHBOARD_PATH = "/dashboard";
+
+export const AUTH_STATE_PATH = "auth.json";
+export const OUTPUT_DIR = "output";
+
+const aspect = requiredEnv("ASPECT");
+if (aspect !== "16x9" && aspect !== "9x16") {
+  throw new Error("ASPECT must be '16x9' or '9x16'");
+}
 export const VIEWPORT =
-  aspect === '9x16' ? { width: 1080, height: 1920 } : { width: 1920, height: 1080 };
-export const CAPTURE_SCALE = Math.max(1, Number(process.env.CAPTURE_SCALE ?? 1));
+  aspect === "9x16"
+    ? { width: 1080, height: 1920 }
+    : { width: 1920, height: 1080 };
+export const CAPTURE_SCALE = Math.max(1, requiredNumberEnv("CAPTURE_SCALE"));
 export const VIDEO_SIZE = {
  width: VIEWPORT.width,
  height: VIEWPORT.height,
 };
-export const WEBM_BITRATE = process.env.WEBM_BITRATE ?? (CAPTURE_SCALE > 1 ? '18M' : '8M');
+export const WEBM_BITRATE = requiredEnv("WEBM_BITRATE");

 // Cold-open prompt. Punchy version of the user's intent, short enough to type
 // on camera without making the opening scene drag.
-export const PROMPT_TEXT =
-  process.env.PROMPT_TEXT ?? 'Flats or terraces <£450k, 35 min to Manchester, low crime';
+export const PROMPT_TEXT = requiredEnv("PROMPT_TEXT");

 // Filters returned by the AI stub. Keys MUST match real feature names from
 // /api/features (verified against the running server's schema).
 export const STUBBED_FILTERS: Record<string, [number, number] | string[]> = {
-  'Property type': ['Flats/Maisonettes', 'Terraced'],
-  'Estimated current price': [175000, 450000],
-  'Serious crime per 1k residents (avg/yr)': [0, 55],
-  'Noise (dB)': [50, 68],
+  "Property type": ["Flats/Maisonettes", "Terraced"],
+  "Estimated current price": [175000, 450000],
+  "Serious crime per 1k residents (avg/yr)": [0, 55],
+  "Noise (dB)": [50, 68],
 };

 // Travel-time filters returned by the AI stub. Slug matches the real
 // /api/travel-destinations?mode=transit response.
 export const STUBBED_TRAVEL_TIME_FILTERS: {
-  mode: 'transit' | 'car' | 'bicycle' | 'walking';
+  mode: "transit" | "car" | "bicycle" | "walking";
  slug: string;
  label: string;
  min?: number;
  max?: number;
 }[] = [
  {
-    mode: 'transit',
-    slug: 'manchester',
-    label: 'Manchester city centre',
+    mode: "transit",
+    slug: "manchester",
+    label: "Manchester city centre",
    max: 35,
  },
 ];
@ -55,7 +75,7 @@ export const TT_DRAG_TO_MIN = 20;

 // Cold-open zoom: how aggressively to magnify the AI box.
 // 2.4 fills most of the viewport with the prompt card without blowing up text.
-export const AI_ZOOM_SCALE = Number(process.env.AI_ZOOM_SCALE ?? 2.4);
+export const AI_ZOOM_SCALE = requiredNumberEnv("AI_ZOOM_SCALE");

 // Initial map view used while we navigate. The AI scene zooms in on the
 // sidebar so this only matters once we zoom out.
@ -67,13 +87,18 @@ export const INITIAL_MAP_VIEW = {

 // Verification guard only. The renderer does not use this as an editing cap:
 // if the storyboard needs more than 15 seconds to avoid jumps, keep the frames.
-export const MAX_DURATION_S = Number(process.env.MAX_DURATION_S ?? 45);
-export const MIN_DURATION_S = Number(process.env.MIN_DURATION_S ?? 10);
+export const MAX_DURATION_S = requiredNumberEnv("MAX_DURATION_S");
+export const MIN_DURATION_S = requiredNumberEnv("MIN_DURATION_S");

 // Target fps of the FINAL output.
-export const OUTPUT_FPS = Number(process.env.OUTPUT_FPS ?? 50);
+export const OUTPUT_FPS = requiredNumberEnv("OUTPUT_FPS");
+
+// Frames of head-room kept in front of sceneStart when trimming. Shared by
+// the video trim and the narration manifest so cue offsets line up with the
+// trimmed timeline.
+export const LEAD_IN_S = 0.12;

 // Brand strings for the outro card.
-export const BRAND_NAME = 'Perfect Postcode';
-export const BRAND_TAGLINE = 'Find where you actually want to live.';
-export const BRAND_URL = 'https://perfect-postcode.co.uk';
+export const BRAND_NAME = "Perfect Postcode";
+export const BRAND_TAGLINE = "Find where you actually want to live.";
+export const BRAND_URL = "https://perfect-postcode.co.uk";
--- a/video/src/dom.ts
+++ b/video/src/dom.ts
@ -20,8 +20,10 @@ export async function installCursor(page: Page): Promise<void> {
        pointer-events: none;
        z-index: 2147483646;
        transform: translate(-2px, -2px);
+        transform-origin: 2px 2px;
        transition: transform 60ms linear, scale 120ms ease-out;
-        will-change: transform;
+        will-change: transform, scale;
+        scale: 1;
      }
      #__demo-cursor svg {
        filter: drop-shadow(0 2px 4px rgba(0,0,0,0.35));
@ -225,6 +227,30 @@ export async function showCaption(page: Page, text: string): Promise<void> {
  }, text);
 }

+/**
+ * Animate the visible cursor to a new CSS scale. The injected cursor element
+ * uses the `scale` shorthand (separate from `transform: translate(...)`),
+ * which means resizing it doesn't fight the per-frame translate updates from
+ * mousemove. The transition duration is set inline so each call decides its
+ * own pace.
+ */
+export async function setCursorScale(
+  page: Page,
+  scale: number,
+  durationMs: number
+): Promise<void> {
+  await page.evaluate(
+    ({ scale, durationMs }) => {
+      const cursor = document.getElementById('__demo-cursor');
+      if (!cursor) return;
+      cursor.style.transition =
+        `transform 60ms linear, scale ${Math.max(0, durationMs)}ms cubic-bezier(0.22, 1, 0.36, 1)`;
+      cursor.style.scale = String(scale);
+    },
+    { scale, durationMs }
+  );
+}
+
 export async function hideCaption(page: Page): Promise<void> {
  await page.evaluate(() => {
    document.getElementById('__demo-caption')?.classList.remove('visible');
--- a/video/src/motion.ts
+++ b/video/src/motion.ts
@ -72,18 +72,31 @@ export async function smoothMove(

 /**
 * "Fake" type: progressively set the textarea value, dispatching
- * React-compatible input events. This stays Node-driven so typing cadence is
- * stable even when the map is busy rendering.
+ * React-compatible input events.
+ *
+ * Cadence is generated as a per-char weight ratio (so spaces and punctuation
+ * read as natural pauses), then **rescaled** so that the sum of delays equals
+ * `totalDurationMs` exactly. The runner depends on this: it budgets a
+ * specific number of ms for the type step, and any divergence would cascade
+ * into narration drift.
 */
 export async function fakeType(
  page: Page,
  selector: string,
  text: string,
-  delayMs: number
+  totalDurationMs: number
 ): Promise<void> {
  const steps = text.length;
+  if (steps === 0) {
+    if (totalDurationMs > 0) await sleep(totalDurationMs);
+    return;
+  }
+
+  const weights = computeTypingWeights(text);
+  const weightSum = weights.reduce((a, b) => a + b, 0);
+  const msPerWeight = totalDurationMs / weightSum;
+
  for (let i = 1; i <= steps; i++) {
-    const end = Math.ceil((text.length * i) / steps);
    await page.evaluate(
      ({ selector, value }) => {
        const ta = document.querySelector(selector) as HTMLTextAreaElement | null;
@ -97,28 +110,25 @@ export async function fakeType(
        setValue.call(ta, value);
        ta.dispatchEvent(new Event('input', { bubbles: true }));
      },
-      { selector, value: text.slice(0, end) }
+      { selector, value: text.slice(0, i) }
    );
-    if (delayMs > 0 && i < steps) {
-      await new Promise((resolve) =>
-        setTimeout(resolve, humanTypingDelay(text[i - 1], text[i], i, delayMs))
-      );
+    if (i < steps) {
+      const ms = Math.max(0, Math.round(weights[i - 1] * msPerWeight));
+      if (ms > 0) await sleep(ms);
    }
  }
 }

-function humanTypingDelay(
-  char: string,
-  nextChar: string | undefined,
-  index: number,
-  baseDelayMs: number
-): number {
+function computeTypingWeights(text: string): number[] {
  const cadence = [0.82, 1.08, 0.94, 1.22, 0.88, 1.14, 0.98, 1.28];
-  let delay = baseDelayMs * cadence[index % cadence.length];
-  if (char === ' ') delay += baseDelayMs * 0.9;
-  if (/[,.!?;:]/.test(char)) delay += baseDelayMs * 1.8;
-  if (nextChar === ' ' && index % 4 === 0) delay += baseDelayMs * 0.55;
-  return Math.round(delay);
+  return Array.from(text, (char, index) => {
+    let weight = cadence[index % cadence.length];
+    if (char === ' ') weight += 0.9;
+    if (/[,.!?;:]/.test(char)) weight += 1.8;
+    const next = text[index + 1];
+    if (next === ' ' && index % 4 === 0) weight += 0.55;
+    return weight;
+  });
 }

 /**
--- a/video/src/narration.ts
+++ b/video/src/narration.ts
@ -0,0 +1,37 @@
+import { writeFileSync } from 'node:fs';
+
+export interface NarrationCue {
+  text: string;
+  videoTimeMs: number;
+  durationMs: number;
+}
+
+/**
+ * Narration manifest writer.
+ *
+ * The runner knows the exact video-time of each narration block from the
+ * storyboard itself, so cues come in with an explicit `videoTimeMs` instead
+ * of being stamped against a wall-clock origin. That keeps the manifest in
+ * lockstep with the trimmed video even if step durations drift slightly.
+ */
+class NarrationLog {
+  private cues: NarrationCue[] = [];
+
+  reset(): void {
+    this.cues = [];
+  }
+
+  add(cue: NarrationCue): void {
+    if (cue.videoTimeMs < 0) return;
+    this.cues.push(cue);
+  }
+
+  flush(path: string, totalDurationMs: number): NarrationCue[] {
+    const sorted = [...this.cues].sort((a, b) => a.videoTimeMs - b.videoTimeMs);
+    const manifest = { totalDurationMs, cues: sorted };
+    writeFileSync(path, JSON.stringify(manifest, null, 2));
+    return sorted;
+  }
+}
+
+export const narrationLog = new NarrationLog();
--- a/video/src/preflight.ts
+++ b/video/src/preflight.ts
@ -0,0 +1,32 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { OUTPUT_DIR } from './config.js';
+import { storyboard } from './storyboard.js';
+
+/**
+ * Emit the narration script for the synth step.
+ *
+ * Synth (tts/synth.py) runs BEFORE recording, so it needs the full ordered
+ * narration list — text + per-cue gaps — without depending on Playwright,
+ * the dashboard, or auth. Walk the storyboard cues, write a flat manifest,
+ * exit.
+ *
+ * The cue index in this manifest is the source of truth: the runner later
+ * matches storyboard cues to measured durations by index.
+ */
+function main(): void {
+  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
+
+  const items = storyboard.cues.map((cue, cueIndex) => ({
+    cueIndex,
+    text: cue.text.trim(),
+    gapBeforeMs: cue.gapBeforeMs,
+  }));
+
+  const manifest = { items };
+  const path = join(OUTPUT_DIR, 'narration-script.json');
+  writeFileSync(path, JSON.stringify(manifest, null, 2));
+  console.log(`Wrote ${items.length} narration cues to ${path}`);
+}
+
+main();
--- a/video/src/record.ts
+++ b/video/src/record.ts
@ -1,8 +1,10 @@
 import { existsSync, mkdirSync, statSync } from 'node:fs';
 import { join } from 'node:path';
-import { AUTH_STATE_PATH, OUTPUT_DIR } from './config.js';
+import { AUTH_STATE_PATH, LEAD_IN_S, OUTPUT_DIR } from './config.js';
 import { assertHardwareWebGL, launchRecordingBrowser } from './browser.js';
+import { narrationLog } from './narration.js';
 import { installDemoRoutes } from './routes.js';
+import { storyboard } from './storyboard.js';
 import { prepareTimeline, runTimeline } from './timeline.js';
 import { trimRecording } from './video.js';

@ -37,7 +39,7 @@ async function main() {

  await installDemoRoutes(page);
  const ctx = await prepareTimeline(page);
-  const timeline = await runTimeline(ctx);
+  const timeline = await runTimeline(ctx, storyboard);

  await page.close();
  const rawPath = join(OUTPUT_DIR, 'recording.raw.webm');
@ -54,6 +56,16 @@ async function main() {
    recordStartMs,
    ...timeline,
  });
+
+  const totalDurationMs =
+    timeline.sceneEndMs - timeline.sceneStartMs + LEAD_IN_S * 1000;
+  const cues = narrationLog.flush(
+    join(OUTPUT_DIR, 'narration.json'),
+    totalDurationMs
+  );
+  console.log(
+    `Wrote ${cues.length} narration cues to ${join(OUTPUT_DIR, 'narration.json')}`
+  );
  console.log('Run "npm run encode" to produce output/recording.mp4');
 }

--- a/video/src/runner.ts
+++ b/video/src/runner.ts
@ -0,0 +1,275 @@
+import { existsSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import type { Page } from 'playwright';
+import { LEAD_IN_S, OUTPUT_DIR } from './config.js';
+import {
+  clearVignette,
+  hideCaption,
+  setCursorScale,
+  showCaption,
+  showOutro,
+  zoomReset,
+  zoomTo,
+} from './dom.js';
+import { fakeType, sleep, smoothDragSliderThumb, smoothMove } from './motion.js';
+import { narrationLog } from './narration.js';
+import type { Activity, Cue, ScriptCtx, Storyboard, Target } from './script.js';
+
+export interface RunnerResult {
+  /** Wall-clock when the first activity started. */
+  sceneStartMs: number;
+  /** Wall-clock when the last activity finished (after padding). */
+  sceneEndMs: number;
+}
+
+const MAP_ZOOM_WHEEL_DELTA = -120;
+const FALLBACK_MS_PER_WORD = 750;
+const FALLBACK_TAIL_BUFFER_MS = 800;
+
+interface SynthCue {
+  cueIndex: number;
+  text: string;
+  durationMs: number;
+}
+
+/**
+ * Drive the recording from a cue-anchored storyboard.
+ *
+ * Synth runs first and writes ``output/audio/index.json`` with per-cue
+ * measured durations. The runner reads that manifest and sizes each cue's
+ * wall-clock to its measured audio length: ``during`` activities run
+ * sequentially with their declared budgets, then a final wait pads to the
+ * full cue duration so the caption stays on for as long as the audio
+ * plays. ``tail`` activities run after the caption hides; ``gapBeforeMs``
+ * inserts pure silence before the next cue.
+ *
+ * The activity cursor is wall-clock honest: each step advances it by
+ * ``max(declared, actual)`` so an overrun extends the timeline rather than
+ * silently desyncing the narration manifest from reality. videoTimeMs
+ * recorded for each cue therefore matches the trimmed mp4 frame-for-frame,
+ * which is what the mux step needs to drop audio at the right moment.
+ *
+ * If the audio manifest is missing (``--no-audio`` runs), we fall back to a
+ * worst-case estimate (750ms/word + 800ms buffer) so the visual flow still
+ * works, just without sound.
+ */
+export async function runStoryboard(
+  ctx: ScriptCtx,
+  storyboard: Storyboard
+): Promise<RunnerResult> {
+  narrationLog.reset();
+
+  const synth = loadSynthIndex(storyboard);
+  const sceneStartMs = Date.now();
+  const leadInMs = LEAD_IN_S * 1000;
+  const cursor = { ms: 0 };
+
+  for (const step of storyboard.pre ?? []) {
+    cursor.ms += await runStep(ctx, step);
+  }
+
+  for (let i = 0; i < storyboard.cues.length; i++) {
+    await runCue(ctx, storyboard.cues[i], synth[i], cursor, leadInMs);
+  }
+
+  for (const step of storyboard.post ?? []) {
+    cursor.ms += await runStep(ctx, step);
+  }
+
+  return { sceneStartMs, sceneEndMs: sceneStartMs + cursor.ms };
+}
+
+async function runCue(
+  ctx: ScriptCtx,
+  cue: Cue,
+  synth: SynthCue,
+  cursor: { ms: number },
+  leadInMs: number
+): Promise<void> {
+  if (cue.gapBeforeMs > 0) {
+    await sleep(cue.gapBeforeMs);
+    cursor.ms += cue.gapBeforeMs;
+  }
+
+  const measuredAudioMs = synth.durationMs;
+  narrationLog.add({
+    text: cue.text,
+    videoTimeMs: cursor.ms + leadInMs,
+    durationMs: measuredAudioMs,
+  });
+  await showCaption(ctx.page, cue.text);
+
+  const during = cue.during ?? [];
+  const declaredSum = during.reduce((s, a) => s + a.durationMs, 0);
+  if (declaredSum > measuredAudioMs + 50) {
+    throw new Error(
+      `Cue ${synth.cueIndex} "${cue.text.slice(0, 40)}…" has ${declaredSum}ms of ` +
+        `during activities but the measured audio is only ${measuredAudioMs}ms. ` +
+        `Trim a during step, lengthen the cue text, or move work into tail.`
+    );
+  }
+  // Time the during block as a whole — individual steps may overrun their
+  // budgets, but what matters at the cue boundary is total wall-clock.
+  const duringStart = Date.now();
+  for (const step of during) {
+    await runStep(ctx, step);
+  }
+  const duringElapsed = Date.now() - duringStart;
+  if (duringElapsed < measuredAudioMs) {
+    await sleep(measuredAudioMs - duringElapsed);
+    cursor.ms += measuredAudioMs;
+  } else {
+    cursor.ms += duringElapsed;
+  }
+
+  await hideCaption(ctx.page);
+
+  for (const step of cue.tail ?? []) {
+    cursor.ms += await runStep(ctx, step);
+  }
+}
+
+/**
+ * Run a single activity. Pads short steps to their declared budget, lets
+ * long ones bleed past it, and returns ``max(declared, actual)`` so the
+ * caller can advance the wall-clock-honest cursor.
+ */
+async function runStep(ctx: ScriptCtx, step: Activity): Promise<number> {
+  const startedAt = Date.now();
+  await runActivity(ctx, step);
+  const realMs = Date.now() - startedAt;
+  if (realMs < step.durationMs) {
+    await sleep(step.durationMs - realMs);
+    return step.durationMs;
+  }
+  if (realMs > step.durationMs + 50) {
+    console.log(
+      `[runner] step ${step.kind} ran ${realMs}ms over a ${step.durationMs}ms budget (drift +${realMs - step.durationMs}ms)`
+    );
+  }
+  return realMs;
+}
+
+async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
+  switch (step.kind) {
+    case 'wait':
+      return;
+    case 'clearVignette':
+      await clearVignette(ctx.page);
+      return;
+    case 'zoomTo': {
+      const focus = await resolveTarget(ctx, step.target);
+      await zoomTo(ctx.page, {
+        scale: step.scale,
+        focusX: focus.x,
+        focusY: focus.y,
+        durationMs: step.durationMs,
+      });
+      return;
+    }
+    case 'zoomReset':
+      await zoomReset(ctx.page, step.durationMs);
+      return;
+    case 'cursorScale':
+      await setCursorScale(ctx.page, step.scale, step.durationMs);
+      return;
+    case 'moveCursor': {
+      const to = await resolveTarget(ctx, step.target);
+      await smoothMove(ctx.page, ctx.cursor, to, { durationMs: step.durationMs });
+      ctx.cursor = to;
+      return;
+    }
+    case 'click': {
+      const to = await resolveTarget(ctx, step.target);
+      const moveMs = Math.max(120, Math.round(step.durationMs * 0.7));
+      await smoothMove(ctx.page, ctx.cursor, to, { durationMs: moveMs });
+      ctx.cursor = to;
+      await ctx.page.mouse.click(to.x, to.y);
+      return;
+    }
+    case 'type':
+      await fakeType(ctx.page, step.selector, step.text, step.durationMs);
+      return;
+    case 'mapZoom': {
+      const point = await resolveTarget(ctx, step.target);
+      await ctx.page.mouse.move(point.x, point.y);
+      const perStepMs = Math.floor(step.durationMs / Math.max(1, step.steps));
+      for (let i = 0; i < step.steps; i++) {
+        await ctx.page.mouse.wheel(0, MAP_ZOOM_WHEEL_DELTA);
+        if (perStepMs > 0) await sleep(perStepMs);
+      }
+      return;
+    }
+    case 'dragSlider':
+      ctx.cursor = await smoothDragSliderThumb(
+        ctx.page,
+        step.thumbSelector,
+        step.trackSelector,
+        ctx.cursor,
+        step.toFraction,
+        step.durationMs
+      );
+      return;
+    case 'submitForm':
+      await ctx.page.evaluate((selector) => {
+        document.querySelector<HTMLFormElement>(selector)?.requestSubmit();
+      }, step.formSelector);
+      return;
+    case 'showOutro':
+      await showOutro(ctx.page, step.brand, step.tagline, step.url);
+      return;
+  }
+}
+
+async function resolveTarget(
+  ctx: ScriptCtx,
+  target: Target
+): Promise<{ x: number; y: number }> {
+  if (target.kind === 'point') return { x: target.x, y: target.y };
+  if (target.kind === 'hexagon') {
+    const targets = await ctx.dashboard.visibleHexagonTargets(1);
+    if (targets.length === 0) throw new Error('No visible hexagon to target');
+    return { x: targets[0].x, y: targets[0].y };
+  }
+  const box = await ctx.page.locator(target.selector).boundingBox();
+  if (!box) throw new Error(`No bounding box for selector: ${target.selector}`);
+  return { x: box.x + box.width / 2, y: box.y + box.height / 2 };
+}
+
+/**
+ * Load synth's measured cue durations. Falls back to a worst-case estimate
+ * if the manifest is missing — that path is only used for ``--no-audio``
+ * runs, where the visual flow needs to play even without speech to time
+ * against.
+ */
+function loadSynthIndex(storyboard: Storyboard): SynthCue[] {
+  const path = join(OUTPUT_DIR, 'audio', 'index.json');
+  if (existsSync(path)) {
+    const raw = JSON.parse(readFileSync(path, 'utf-8')) as {
+      items: SynthCue[];
+    };
+    const byIndex = new Map(raw.items.map((it) => [it.cueIndex, it] as const));
+    return storyboard.cues.map((cue, i) => {
+      const m = byIndex.get(i);
+      if (!m) {
+        throw new Error(
+          `Synth manifest is missing cue ${i} ("${cue.text.slice(0, 40)}…"). ` +
+            `Re-run preflight + synth so the audio matches the storyboard.`
+        );
+      }
+      return m;
+    });
+  }
+  console.log(
+    `[runner] no ${path} found — using worst-case fallback durations (${FALLBACK_MS_PER_WORD}ms/word + ${FALLBACK_TAIL_BUFFER_MS}ms buffer). Audio will be missing.`
+  );
+  return storyboard.cues.map((cue, cueIndex) => ({
+    cueIndex,
+    text: cue.text,
+    durationMs:
+      cue.text.split(/\s+/).filter(Boolean).length * FALLBACK_MS_PER_WORD +
+      FALLBACK_TAIL_BUFFER_MS,
+  }));
+}
+
+export type { Page };
--- a/video/src/script.ts
+++ b/video/src/script.ts
@ -0,0 +1,109 @@
+import type { Page } from 'playwright';
+import type { DashboardRecorder } from './dashboard.js';
+
+/**
+ * Public scripting API for the demo video.
+ *
+ * The storyboard is a `Storyboard` — an ordered list of narration cues, each
+ * carrying the activities that play alongside it. Audio is generated FIRST
+ * (one batched Qwen call so the voice stays consistent across cues); the
+ * runner then reads the measured per-cue durations and slots `during`
+ * activities inside each cue's audio window.
+ *
+ * Why cue-anchored: the audio drives pacing. Re-running synth produces a new
+ * set of measured durations and the storyboard self-aligns — you don't have
+ * to retune activity numbers. Author intent stays declarative ("zoom + type
+ * happen during this cue, dwell 4s after, then next cue starts").
+ */
+
+export interface ScriptCtx {
+  page: Page;
+  dashboard: DashboardRecorder;
+  cursor: { x: number; y: number };
+}
+
+/** A point on screen, either absolute pixel coords or the centre of an element. */
+export type Target =
+  | { kind: 'point'; x: number; y: number }
+  | { kind: 'element'; selector: string }
+  /**
+   * Resolved at runtime to the centre of a visible hexagon/postcode polygon,
+   * picked from the dashboard's most recent map response. Robust to any zoom
+   * level — use this when the click MUST land on a polygon and a fixed pixel
+   * coordinate would risk landing on a road or river at deep zoom.
+   */
+  | { kind: 'hexagon' };
+
+export const at = (x: number, y: number): Target => ({ kind: 'point', x, y });
+export const el = (selector: string): Target => ({ kind: 'element', selector });
+export const hex = (): Target => ({ kind: 'hexagon' });
+
+/**
+ * Activities are the runner's atomic operations. Each one has a fixed
+ * `durationMs` budget; the runner pads short overruns and warns on long ones.
+ */
+export type Activity =
+  /** Pure pause. Useful for spacing. */
+  | { kind: 'wait'; durationMs: number }
+  /** Smoothly zoom the dashboard wrapper so `target` lands at viewport centre. */
+  | { kind: 'zoomTo'; target: Target; scale: number; durationMs: number }
+  /** Animate the wrapper back to identity. */
+  | { kind: 'zoomReset'; durationMs: number }
+  /** Slide the cursor from its current position to `target`. */
+  | { kind: 'moveCursor'; target: Target; durationMs: number }
+  /** Move + click + ripple. `durationMs` is the whole gesture, including settle. */
+  | { kind: 'click'; target: Target; durationMs: number }
+  /** Type into a textarea/input over exactly `durationMs`. */
+  | { kind: 'type'; selector: string; text: string; durationMs: number }
+  /** Grow or shrink the visible cursor (CSS scale). */
+  | { kind: 'cursorScale'; scale: number; durationMs: number }
+  /**
+   * Wheel-zoom the underlying map at `target`. `steps` controls intensity
+   * (each step is one ~120px wheel notch).
+   */
+  | { kind: 'mapZoom'; target: Target; steps: number; durationMs: number }
+  /** Drag the right thumb of a Radix slider to a fraction in [0,1]. */
+  | {
+      kind: 'dragSlider';
+      thumbSelector: string;
+      trackSelector: string;
+      toFraction: number;
+      durationMs: number;
+    }
+  /** Submit a form found by selector and wait `durationMs`. */
+  | { kind: 'submitForm'; formSelector: string; durationMs: number }
+  /** Reveal the closing brand card. */
+  | { kind: 'showOutro'; brand: string; tagline: string; url: string; durationMs: number }
+  /** Fade away the opening vignette. */
+  | { kind: 'clearVignette'; durationMs: number };
+
+/**
+ * A narration cue + the activities that play alongside it.
+ *
+ *   gapBeforeMs : silent wall-time before the caption appears (= silence in
+ *                 audio between the previous cue ending and this one).
+ *   during      : activities that play WHILE the caption is on screen. The
+ *                 sum of declared durations must be ≤ the measured audio
+ *                 duration; the runner pads short blocks so the caption stays
+ *                 on for the full cue. Sum > measured is a hard error.
+ *   tail        : activities that run AFTER the caption hides, before the
+ *                 next cue's gapBefore starts. Use it for dwells/transitions
+ *                 that aren't tied to spoken words.
+ */
+export interface Cue {
+  text: string;
+  gapBeforeMs: number;
+  during?: Activity[];
+  tail?: Activity[];
+}
+
+/**
+ * Top-level storyboard. `pre` runs once before the first cue's gapBefore;
+ * `post` runs once after the last cue's tail finishes. The cue list is what
+ * gets handed to the synth step.
+ */
+export interface Storyboard {
+  pre?: Activity[];
+  cues: Cue[];
+  post?: Activity[];
+}
--- a/video/src/storyboard.ts
+++ b/video/src/storyboard.ts
@ -0,0 +1,170 @@
+import {
+  AI_ZOOM_SCALE,
+  BRAND_NAME,
+  BRAND_TAGLINE,
+  BRAND_URL,
+  PROMPT_TEXT,
+  TT_CARD_SELECTOR,
+  TT_DRAG_TO_MIN,
+  TT_SLIDER_MAX,
+} from './config.js';
+import { el, type Storyboard } from './script.js';
+
+/**
+ * The demo video, top to bottom.
+ *
+ * Audio is generated first (one batched Qwen call), so each cue's actual
+ * duration is known before recording. The runner sizes each cue's wall-time
+ * to the measured audio length, padding short `during` blocks with a
+ * trailing wait. Inter-cue spacing is controlled here via `gapBeforeMs`
+ * (silence in audio) plus optional `tail` activities (visual movement after
+ * the caption hides, before the next cue's gap).
+ *
+ * Sum of `during` declared durations MUST be ≤ measured cue duration. If
+ * synth comes back tighter than the activities can fit, the runner throws
+ * with a pointer to the offending cue — bump that cue's text, lengthen its
+ * gapBefore, or trim a during step.
+ *
+ * Reference durations (Qwen3-TTS / speaker=ryan, 2026-05-09 measured):
+ *   cue 0  1920ms   "Describe the life you want."
+ *   cue 1  2720ms   "Every matching neighbourhood, side by side."
+ *   cue 2  2160ms   "Tighten the commute to 20 minutes."
+ *   cue 3  1840ms   "Drill into a single block."
+ *   cue 4  4480ms   "Stats, listings, Street View, price history…"
+ *   cue 5  1760ms   "Take the shortlist into Excel."
+ *   cue 6  4400ms   "Perfect Postcode. Find where you actually want to live."
+ */
+export const storyboard: Storyboard = {
+  // Camera push-in to the AI box happens before the first caption — silent
+  // setup keeps the cold open from feeling rushed.
+  pre: [
+    { kind: 'clearVignette', durationMs: 0 },
+    { kind: 'wait', durationMs: 200 },
+    {
+      kind: 'zoomTo',
+      target: el('[data-tutorial="ai-filters"]'),
+      scale: AI_ZOOM_SCALE,
+      durationMs: 1300,
+    },
+    { kind: 'wait', durationMs: 140 },
+  ],
+
+  cues: [
+    // -- Scene 1: AI prompt ----------------------------------------------
+    // Cue 0 is short (1920ms) — caption shows alone, then typing + submit
+    // happen silently in the tail. The natural beat is: viewer hears the
+    // brief, then watches the prompt being typed.
+    {
+      text: 'Describe the life you want.',
+      gapBeforeMs: 0,
+      tail: [
+        { kind: 'wait', durationMs: 140 },
+        {
+          kind: 'type',
+          selector: '[data-tutorial="ai-filters"] textarea',
+          text: PROMPT_TEXT,
+          durationMs: 3000,
+        },
+        { kind: 'wait', durationMs: 140 },
+        { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 },
+        { kind: 'wait', durationMs: 700 },
+      ],
+    },
+
+    // -- Scene 2: zoom out reveal ---------------------------------------
+    {
+      text: 'Every matching neighbourhood, side by side.',
+      gapBeforeMs: 400,
+      during: [{ kind: 'zoomReset', durationMs: 1400 }],
+      tail: [{ kind: 'wait', durationMs: 1200 }],
+    },
+
+    // -- Scene 3: travel-time slider ------------------------------------
+    {
+      text: `Tighten the commute to ${TT_DRAG_TO_MIN} minutes.`,
+      gapBeforeMs: 500,
+      during: [
+        {
+          kind: 'dragSlider',
+          thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
+          trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
+          toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
+          durationMs: 1400,
+        },
+      ],
+      tail: [{ kind: 'wait', durationMs: 1200 }],
+    },
+
+    // -- Scene 4a: deep zoom into a hexagon -----------------------------
+    // The mapZoom barely fits (1500ms vs cue 1840ms); cursor prep happens
+    // earlier in this cue's during, the click + payoff dwell are in tail.
+    {
+      text: 'Drill into a single block.',
+      gapBeforeMs: 500,
+      during: [
+        { kind: 'cursorScale', scale: 1.4, durationMs: 200 },
+        {
+          kind: 'mapZoom',
+          target: { kind: 'point', x: 1140, y: 605 },
+          steps: 18,
+          durationMs: 1500,
+        },
+      ],
+      tail: [
+        // Wait for the post-zoom /api/postcodes response and a redraw
+        // before the click — otherwise the click can fire on a stale
+        // frame and miss the polygon.
+        { kind: 'wait', durationMs: 1200 },
+        {
+          kind: 'click',
+          target: { kind: 'point', x: 1140, y: 605 },
+          durationMs: 700,
+        },
+        { kind: 'cursorScale', scale: 1, durationMs: 280 },
+        // Linger so the climax cue lands on the right-pane reveal.
+        { kind: 'wait', durationMs: 1500 },
+      ],
+    },
+
+    // -- Scene 4b: right-pane payoff -----------------------------------
+    // 4480ms cue, no during — the camera holds on the populated right pane
+    // for the whole climax line. Tail dwells before the export beat.
+    {
+      text: 'Stats, listings, Street View, price history — all in one pane.',
+      gapBeforeMs: 0,
+      tail: [{ kind: 'wait', durationMs: 1200 }],
+    },
+
+    // -- Scene 5: export ------------------------------------------------
+    // 1760ms cue. zoomReset + click together fit (1700ms); 60ms padding.
+    {
+      text: 'Take the shortlist into Excel.',
+      gapBeforeMs: 500,
+      during: [
+        { kind: 'zoomReset', durationMs: 900 },
+        {
+          kind: 'click',
+          target: el('button[title="Export to Excel"]'),
+          durationMs: 800,
+        },
+      ],
+      tail: [{ kind: 'wait', durationMs: 800 }],
+    },
+
+    // -- Scene 6: outro -------------------------------------------------
+    {
+      text: `${BRAND_NAME}. ${BRAND_TAGLINE}`,
+      gapBeforeMs: 600,
+      during: [
+        {
+          kind: 'showOutro',
+          brand: BRAND_NAME,
+          tagline: BRAND_TAGLINE,
+          url: BRAND_URL,
+          durationMs: 0,
+        },
+      ],
+      tail: [{ kind: 'wait', durationMs: 1500 }],
+    },
+  ],
+};
--- a/video/src/timeline.ts
+++ b/video/src/timeline.ts
@ -1,24 +1,19 @@
 import type { Page } from 'playwright';
-import { installCursor, installZoomWrapper } from './dom.js';
 import { DashboardRecorder } from './dashboard.js';
+import { installCursor, installZoomWrapper } from './dom.js';
 import { sleep } from './motion.js';
 import { dashboardUrl } from './routes.js';
-import {
-  prepareAiBox,
-  sceneAiCloseUp,
-  sceneClusterClick,
-  sceneExportAndOutro,
-  sceneTravelTimeSlider,
-  sceneZoomOutResults,
-  type SceneCtx,
-} from './scenes.js';
+import { runStoryboard, type RunnerResult } from './runner.js';
+import type { ScriptCtx, Storyboard } from './script.js';

-export interface TimelineResult {
-  sceneStartMs: number;
-  sceneEndMs: number;
-}
+export type TimelineResult = RunnerResult;

-export async function prepareTimeline(page: Page): Promise<SceneCtx> {
+/**
+ * Boot the dashboard, wait for the first map response, and inject the
+ * recording chrome (cursor, zoom wrapper, caption layer). Also opens the
+ * AI prompt textarea so the storyboard can begin typing immediately.
+ */
+export async function prepareTimeline(page: Page): Promise<ScriptCtx> {
  const dashboard = new DashboardRecorder(page);
  const initialMapVersion = dashboard.getMapDataVersion();
  await page.goto(dashboardUrl(), { waitUntil: 'domcontentloaded' });
@ -29,33 +24,46 @@ export async function prepareTimeline(page: Page): Promise<SceneCtx> {
  await page.locator('canvas').first().waitFor({ state: 'attached', timeout: 15000 });
  await dashboard.waitForMapSettled(initialMapVersion, 15000);

-  await new Promise((r) => setTimeout(r, 400));
+  await sleep(400);
  await installZoomWrapper(page);
  await installCursor(page);

-  const ctx: SceneCtx = { page, dashboard, cursor: { x: 200, y: 240 } };
+  const ctx: ScriptCtx = { page, dashboard, cursor: { x: 200, y: 240 } };
  await page.mouse.move(ctx.cursor.x, ctx.cursor.y);
  await prepareAiBox(ctx);
  await sleep(80);
  return ctx;
 }

-export async function runTimeline(ctx: SceneCtx): Promise<TimelineResult> {
-  const sceneStartMs = Date.now();
-  let mark = sceneStartMs;
-
-  mark = await runScene('AI close-up', mark, () => sceneAiCloseUp(ctx));
-  mark = await runScene('Zoom out', mark, () => sceneZoomOutResults(ctx));
-  mark = await runScene('TT slider', mark, () => sceneTravelTimeSlider(ctx));
-  mark = await runScene('Cluster click', mark, () => sceneClusterClick(ctx));
-  mark = await runScene('Export + outro', mark, () => sceneExportAndOutro(ctx));
-
-  return { sceneStartMs, sceneEndMs: mark };
+export async function runTimeline(
+  ctx: ScriptCtx,
+  storyboard: Storyboard
+): Promise<TimelineResult> {
+  return runStoryboard(ctx, storyboard);
 }

-async function runScene(label: string, prev: number, scene: () => Promise<void>): Promise<number> {
-  await scene();
-  const now = Date.now();
-  console.log(`[scene] ${label}: ${((now - prev) / 1000).toFixed(2)}s wall`);
-  return now;
+/**
+ * Open the AI prompt before the timed scene starts. This is preparation
+ * work, not part of the storyboard, because waiting for the textarea to
+ * appear has indeterminate duration.
+ */
+async function prepareAiBox(ctx: ScriptCtx): Promise<void> {
+  const { page } = ctx;
+  const aiRoot = page.locator('[data-tutorial="ai-filters"]').first();
+  await aiRoot.waitFor({ state: 'visible', timeout: 15000 });
+
+  const textarea = page.locator('[data-tutorial="ai-filters"] textarea');
+  if (!(await textarea.isVisible().catch(() => false))) {
+    const aiButton = aiRoot.locator('button').first();
+    await aiButton.waitFor({ state: 'visible', timeout: 8000 });
+    const btnBox = await aiButton.boundingBox();
+    if (btnBox) await page.mouse.click(btnBox.x + btnBox.width / 2, btnBox.y + btnBox.height / 2);
+  }
+  if (!(await textarea.isVisible().catch(() => false))) {
+    await page.evaluate(() => {
+      document.querySelector<HTMLElement>('[data-tutorial="ai-filters"] button')?.click();
+    });
+  }
+  await textarea.waitFor({ state: 'visible', timeout: 15000 });
+  await sleep(100);
 }
--- a/video/src/video.ts
+++ b/video/src/video.ts
@ -1,8 +1,6 @@
 import { execSync } from 'node:child_process';
 import { renameSync, statSync } from 'node:fs';
-import { MAX_DURATION_S, OUTPUT_FPS, VIDEO_SIZE, WEBM_BITRATE } from './config.js';
-
-const LEAD_IN_S = 0.12;
+import { LEAD_IN_S, MAX_DURATION_S, OUTPUT_FPS, VIDEO_SIZE, WEBM_BITRATE } from './config.js';

 export function trimRecording(
  rawPath: string,
--- a/video/tsconfig.json
+++ b/video/tsconfig.json
@ -10,6 +10,7 @@
    "skipLibCheck": true,
    "forceConsistentCasingInFileNames": true,
    "resolveJsonModule": true,
+    "types": ["node"],
    "declaration": false,
    "sourceMap": true
  },
--- a/video/tts/mux.py
+++ b/video/tts/mux.py
@ -0,0 +1,188 @@
+"""Mux per-cue WAVs into recording.mp4 at their narration offsets.
+
+Reads two manifests:
+
+* ``output/audio/index.json`` (synth output) — per-cue WAV filename + measured
+  duration. Generated BEFORE recording in one batched Qwen3-TTS call.
+* ``output/narration.json`` (recorder output) — per-cue ``videoTimeMs`` against
+  the trimmed video. Generated DURING recording.
+
+Joins them by ``cueIndex`` (index in the cue list, 1:1 between manifests),
+runs ffmpeg with one ``adelay`` per cue plus a single ``amix``, copies the
+video stream, and writes ``output/recording.narrated.mp4``.
+
+Run from the ``video/`` directory after recording:
+
+    uv run --project tts python tts/mux.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--audio-dir", type=Path, default=Path("output/audio"))
+    parser.add_argument(
+        "--narration",
+        type=Path,
+        default=Path("output/narration.json"),
+        help="Per-cue videoTimeMs manifest written by the recorder.",
+    )
+    parser.add_argument("--video", type=Path, default=Path("output/recording.mp4"))
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=Path("output/recording.narrated.mp4"),
+    )
+    parser.add_argument(
+        "--replace",
+        action="store_true",
+        help="After muxing, atomically replace --video with --out.",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+
+    if not shutil.which("ffmpeg"):
+        print("[mux] ffmpeg not on PATH", file=sys.stderr)
+        return 1
+
+    audio_index_path = args.audio_dir / "index.json"
+    if not audio_index_path.exists():
+        print(
+            f"[mux] {audio_index_path} not found; run tts/synth.py first",
+            file=sys.stderr,
+        )
+        return 1
+
+    if not args.narration.exists():
+        print(
+            f"[mux] {args.narration} not found; the recorder must run before mux",
+            file=sys.stderr,
+        )
+        return 1
+
+    if not args.video.exists():
+        print(f"[mux] video not found: {args.video}", file=sys.stderr)
+        return 1
+
+    audio_index = json.loads(audio_index_path.read_text())
+    audio_items = [it for it in audio_index.get("items", []) if it.get("wav")]
+    if not audio_items:
+        print("[mux] synth produced no cues; copying video unchanged", file=sys.stderr)
+        shutil.copyfile(args.video, args.out)
+        return 0
+
+    narration = json.loads(args.narration.read_text())
+    nar_cues = list(narration.get("cues", []))
+    if len(nar_cues) != len(audio_items):
+        print(
+            f"[mux] cue count mismatch: synth has {len(audio_items)} cues, "
+            f"recorder logged {len(nar_cues)}. Re-run preflight + synth + record.",
+            file=sys.stderr,
+        )
+        return 1
+
+    # Sort audio items by cueIndex so list-order matches the recorder's
+    # cue list (which is also in cue order). Then pair 1:1.
+    audio_by_index = {int(it["cueIndex"]): it for it in audio_items}
+    items = []
+    for i, nar in enumerate(nar_cues):
+        audio = audio_by_index.get(i)
+        if audio is None:
+            print(f"[mux] no synth wav for cue {i}", file=sys.stderr)
+            return 1
+        items.append(
+            {
+                "cueIndex": i,
+                "wav": audio["wav"],
+                "durationMs": int(audio["durationMs"]),
+                "videoTimeMs": int(nar["videoTimeMs"]),
+                "text": nar.get("text", ""),
+            }
+        )
+
+    # Refuse to mux overlapping cues — amix would silently mash voices on top
+    # of each other. Sort by start so the order matches what we'll actually
+    # play, then check that each cue ends before the next one starts.
+    ordered = sorted(items, key=lambda it: it["videoTimeMs"])
+    overlaps: list[str] = []
+    for prev, nxt in zip(ordered, ordered[1:]):
+        prev_end = prev["videoTimeMs"] + prev["durationMs"]
+        nxt_start = nxt["videoTimeMs"]
+        if prev_end > nxt_start:
+            overlaps.append(
+                f"cue {prev['cueIndex']} ends at {prev_end}ms but cue {nxt['cueIndex']} "
+                f"starts at {nxt_start}ms (overlap {prev_end - nxt_start}ms)"
+            )
+    if overlaps:
+        raise SystemExit(
+            "[mux] refusing to produce overlapping narration:\n  - "
+            + "\n  - ".join(overlaps)
+        )
+
+    cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(args.video)]
+    for it in items:
+        cmd += ["-i", str(args.audio_dir / it["wav"])]
+
+    filter_parts: list[str] = []
+    mix_inputs: list[str] = []
+    for n, it in enumerate(items, start=1):
+        delay_ms = max(0, it["videoTimeMs"])
+        label = f"a{n}"
+        # adelay needs one delay per channel; "all=1" applies the same delay
+        # to every channel, which is what we want for mono narration.
+        filter_parts.append(
+            f"[{n}:a]aresample=async=1,adelay={delay_ms}|{delay_ms}:all=1[{label}]"
+        )
+        mix_inputs.append(f"[{label}]")
+
+    mix = (
+        f"{''.join(mix_inputs)}amix=inputs={len(items)}"
+        f":duration=longest:dropout_transition=0:normalize=0[aout]"
+    )
+    filter_complex = ";".join(filter_parts + [mix])
+
+    cmd += [
+        "-filter_complex",
+        filter_complex,
+        "-map",
+        "0:v:0",
+        "-map",
+        "[aout]",
+        "-c:v",
+        "copy",
+        "-c:a",
+        "aac",
+        "-b:a",
+        "192k",
+        "-shortest",
+        "-movflags",
+        "+faststart",
+        str(args.out),
+    ]
+
+    print(f"[mux] muxing {len(items)} narration cues into {args.out}", flush=True)
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        print(f"[mux] ffmpeg exited {result.returncode}", file=sys.stderr)
+        return result.returncode
+
+    if args.replace:
+        args.out.replace(args.video)
+        print(f"[mux] replaced {args.video} with narrated copy", flush=True)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/video/tts/synth.py
+++ b/video/tts/synth.py
@ -0,0 +1,208 @@
+"""Synthesize the full narration in ONE batched Qwen3-TTS call.
+
+Reads ``output/narration-script.json`` (emitted by ``dist/preflight.js``) and
+runs ``Qwen3TTSModel.generate_custom_voice`` with all cue texts as a single
+batched list — that way every cue shares the same model state, which keeps
+prosody and timbre consistent across cues. Per-cue WAVs and an index manifest
+go to ``output/audio/`` for the recording step (which reads measured cue
+durations) and the mux step (which drops each WAV at its videoTime).
+
+Run from the ``video/`` directory:
+
+    uv run --project tts python tts/synth.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+import soundfile as sf
+import torch
+from qwen_tts import Qwen3TTSModel
+
+
+DEFAULT_MODEL = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+DEFAULT_SPEAKER = "ryan"
+DEFAULT_LANGUAGE = "English"
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--script",
+        type=Path,
+        default=Path("output/narration-script.json"),
+        help="Narration script emitted by dist/preflight.js.",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=Path,
+        default=Path("output/audio"),
+        help="Directory to write WAV files and index.json into.",
+    )
+    parser.add_argument(
+        "--model",
+        default=os.environ.get("TTS_MODEL", DEFAULT_MODEL),
+    )
+    parser.add_argument(
+        "--speaker",
+        default=os.environ.get("TTS_SPEAKER", DEFAULT_SPEAKER),
+        help="CustomVoice preset speaker name (use --list-speakers to enumerate).",
+    )
+    parser.add_argument(
+        "--language",
+        default=os.environ.get("TTS_LANGUAGE", DEFAULT_LANGUAGE),
+    )
+    parser.add_argument(
+        "--device",
+        default=os.environ.get("TTS_DEVICE", "cuda:0"),
+    )
+    parser.add_argument(
+        "--list-speakers",
+        action="store_true",
+        help="Load the model, print available speaker names, and exit.",
+    )
+    return parser.parse_args()
+
+
+def load_model(model_id: str, device: str) -> Qwen3TTSModel:
+    dtype = torch.bfloat16 if device.startswith("cuda") else torch.float32
+    print(f"[synth] loading {model_id} on {device} ({dtype})", flush=True)
+    return Qwen3TTSModel.from_pretrained(model_id, device_map=device, dtype=dtype)
+
+
+def cached_index_matches(
+    index_path: Path,
+    cues: list[dict],
+    speaker: str,
+    language: str,
+) -> bool:
+    """Return True iff index_path's cue list lines up with `cues` 1:1.
+
+    Compared fields: ``cueIndex``, ``text``, ``gapBeforeMs`` plus the synth
+    settings (``speaker``, ``language``). All cue WAV files must also exist
+    on disk. Mismatched length, reordered cues, or a missing WAV invalidate
+    the cache.
+    """
+    if not index_path.exists():
+        return False
+    try:
+        cached = json.loads(index_path.read_text())
+    except json.JSONDecodeError:
+        return False
+    if cached.get("speaker") != speaker or cached.get("language") != language:
+        return False
+    cached_items = cached.get("items", [])
+    if len(cached_items) != len(cues):
+        return False
+    for live, prev in zip(cues, cached_items):
+        if int(live["cueIndex"]) != int(prev.get("cueIndex", -1)):
+            return False
+        if live["text"].strip() != str(prev.get("text", "")).strip():
+            return False
+        if int(live.get("gapBeforeMs", 0)) != int(prev.get("gapBeforeMs", -1)):
+            return False
+        wav = prev.get("wav")
+        if not wav or not (index_path.parent / wav).exists():
+            return False
+    return True
+
+
+def main() -> int:
+    args = parse_args()
+
+    if args.list_speakers:
+        model = load_model(args.model, args.device)
+        speakers = model.get_supported_speakers()
+        print(json.dumps(speakers, indent=2, ensure_ascii=False))
+        return 0
+
+    if not args.script.exists():
+        print(f"[synth] script not found: {args.script}", file=sys.stderr)
+        return 1
+
+    script = json.loads(args.script.read_text())
+    cues = [c for c in script.get("items", []) if c.get("text", "").strip()]
+    if not cues:
+        print("[synth] script has no cues; nothing to generate.", file=sys.stderr)
+        return 1
+
+    args.out_dir.mkdir(parents=True, exist_ok=True)
+
+    # Skip generation when the existing audio matches the script — same cue
+    # texts and same gapBeforeMs values in the same order. Saves ~30s of GPU
+    # time when iterating on activity timing without changing narration.
+    if cached_index_matches(args.out_dir / "index.json", cues, args.speaker, args.language):
+        print(
+            f"[synth] cached audio in {args.out_dir} matches the current script — skipping generation",
+            flush=True,
+        )
+        return 0
+
+    model = load_model(args.model, args.device)
+
+    texts = [c["text"].strip() for c in cues]
+    print(f"[synth] generating {len(texts)} cues in one batched call", flush=True)
+    for i, t in enumerate(texts):
+        print(f"[synth]   {i:2d}: {t}", flush=True)
+
+    # ONE batched call. generate_custom_voice handles text=List[str] natively
+    # and broadcasts the speaker/language across all items, so the entire
+    # narration is decoded in one model pass — same RNG state, same batch,
+    # consistent voice from cue to cue.
+    wavs, sr = model.generate_custom_voice(
+        text=texts,
+        language=args.language,
+        speaker=args.speaker,
+    )
+    if len(wavs) != len(texts):
+        print(
+            f"[synth] model returned {len(wavs)} wavs for {len(texts)} cues",
+            file=sys.stderr,
+        )
+        return 1
+
+    items = []
+    for cue, audio in zip(cues, wavs):
+        if hasattr(audio, "cpu"):
+            audio = audio.cpu().float().numpy()
+        wav_name = f"cue_{cue['cueIndex']:03d}.wav"
+        wav_path = args.out_dir / wav_name
+        sf.write(str(wav_path), audio, sr)
+        duration_ms = int(round(len(audio) * 1000 / sr))
+        items.append(
+            {
+                "cueIndex": cue["cueIndex"],
+                "text": cue["text"],
+                "gapBeforeMs": int(cue.get("gapBeforeMs", 0)),
+                "wav": wav_name,
+                "sampleRate": sr,
+                "durationMs": duration_ms,
+            }
+        )
+        print(
+            f"[synth] wrote {wav_name}  {duration_ms:>5d}ms  «{cue['text']}»",
+            flush=True,
+        )
+
+    out_index = {
+        "speaker": args.speaker,
+        "language": args.language,
+        "model": args.model,
+        "items": items,
+    }
+    (args.out_dir / "index.json").write_text(json.dumps(out_index, indent=2))
+    total_ms = sum(it["gapBeforeMs"] + it["durationMs"] for it in items)
+    print(
+        f"[synth] {len(items)} cues, {total_ms}ms of audio (incl. gaps) -> {args.out_dir}",
+        flush=True,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())