LGTM

2026-05-11 21:38:26 +01:00 · 2026-05-11 21:38:26 +01:00 · f2a2651b8a
commit f2a2651b8a
parent 9248e26af2
95 changed files with 3993 additions and 1471 deletions
--- a/video/package.json
+++ b/video/package.json
@ -9,8 +9,6 @@
    "bootstrap-admin": "tsc && node dist/pb-admin.js",
    "setup-auth": "tsc && node dist/auth.js",
    "record": "tsc && node dist/record.js",
-    "record:vertical": "tsc && ASPECT=9x16 node dist/record.js",
-    "encode": "ffmpeg -y -i output/recording.webm -c:v libx264 -pix_fmt yuv420p -crf 14 -preset fast -movflags +faststart output/recording.mp4",
    "verify-output": "tsc && node dist/verify.js",
    "render": "./render.sh"
  },
--- a/video/render.sh
+++ b/video/render.sh
@ -1,6 +1,11 @@
 #!/usr/bin/env bash
 #
-# End-to-end re-render of the dashboard demo video.
+# End-to-end re-render of the dashboard demo videos.
+#
+# All per-storyboard knobs (aspect, fps, bitrate, prompt text, voice persona,
+# poster timestamp, brand strings…) live on the Storyboard objects in
+# src/storyboard.ts. To add a vertical cut or change the voice, edit that
+# file — this script only handles target/auth/transport concerns.
 #
 # Two targets:
 #   local (default) — assumes the docker-compose stack on host.docker.internal,
@ -17,7 +22,6 @@
 #   ./render.sh --no-audio            # skip Qwen3-TTS narration; silent MP4
 #   FORCE_AUTH=1 ./render.sh          # same as --fresh-auth
 #   APP_URL=http://localhost:3001 ./render.sh   # override frontend URL
-#   TTS_SPEAKER=aiden ./render.sh               # override CustomVoice speaker
 #
 # Cred env vars (read for both targets, but prod has no fallback defaults):
 #   LOGIN_EMAIL, LOGIN_PASSWORD       — the dashboard account to record as
@ -48,7 +52,7 @@ case "$TARGET" in
  *) echo "Unknown --target: $TARGET (expected: local, prod)" >&2; exit 2 ;;
 esac

-# -- config (override via env) -------------------------------------------------
+# -- environment (target-specific URLs and credentials) ----------------------
 if [ "$TARGET" = "prod" ]; then
  # Prod serves frontend, /api/*, and /pb/* off the same domain.
  export APP_URL="${APP_URL:-https://perfect-postcode.co.uk}"
@ -81,23 +85,6 @@ AUTH_TTL_HOURS="${AUTH_TTL_HOURS:-24}"  # re-auth if cache older than this
 # the built bundle, so updating this path is what makes the new clip appear
 # on the homepage. Override if the dashboard ever moves.
 PUBLISH_DIR="${PUBLISH_DIR:-../frontend/public/video}"
-# When in the output timeline to grab the poster frame.
-# Right-pane inspection (~16s output) is the clearest paused-state preview:
-# Manchester map, filters applied, right pane populated, larger narration
-# caption visible.
-POSTER_TIME_S="${POSTER_TIME_S:-16}"
-
-# Recorder/encoder knobs read by src/config.ts. config.ts treats these as
-# required, so they live here (the only entry point) rather than as defaults
-# scattered across TS modules. Override per-run via env.
-export ASPECT="${ASPECT:-16x9}"
-export CAPTURE_SCALE="${CAPTURE_SCALE:-1}"
-export WEBM_BITRATE="${WEBM_BITRATE:-$(awk -v s="$CAPTURE_SCALE" 'BEGIN{print (s+0>1)?"18M":"8M"}')}"
-export PROMPT_TEXT="${PROMPT_TEXT:-Flats or terraces <£450k, 35 min to Manchester, low crime}"
-export AI_ZOOM_SCALE="${AI_ZOOM_SCALE:-2.4}"
-export MAX_DURATION_S="${MAX_DURATION_S:-60}"
-export MIN_DURATION_S="${MIN_DURATION_S:-10}"
-export OUTPUT_FPS="${OUTPUT_FPS:-50}"

 FRESH_AUTH="${FORCE_AUTH:-0}"
 DO_ENCODE=1
@ -109,7 +96,7 @@ for arg in "${@:-}"; do
    --no-encode) DO_ENCODE=0 ;;
    --no-audio) DO_AUDIO=0 ;;
    -h|--help)
-      sed -n '3,30p' "$0"
+      sed -n '3,32p' "$0"
      exit 0 ;;
    *) echo "Unknown arg: $arg" >&2; exit 2 ;;
  esac
@ -207,22 +194,57 @@ else
  say "Reusing existing $AUTH_STATE_FILE"
 fi

-# -- preflight + synth (Qwen3-TTS) -------------------------------------------
-# Synth runs BEFORE recording: one batched generate_custom_voice call across
-# all cues so the voice stays consistent. The recorder reads
-# output/audio/index.json for measured per-cue durations and sizes each
-# cue's wall-clock to fit; --no-audio skips synth and the recorder falls
-# back to a worst-case estimate.
+# -- preflight ---------------------------------------------------------------
+# preflight emits per-storyboard narration scripts AND output/storyboards.json
+# (the index this script loops over below). Run it BEFORE wiping per-storyboard
+# files so we know what slugs to target.
 mkdir -p output
-# Wipe last run's leaking artifacts so the rename step picks up *this* run.
-rm -f output/recording.webm output/recording.mp4 output/page@*.webm output/page@*.webm.untrimmed
-rm -f output/narration-script.json output/narration.json
-# output/audio/ is preserved; tts/synth.py decides whether the cached WAVs
-# still match the script and skips generation when they do.
-
-say "Preflight: emitting narration script"
+say "Preflight: emitting narration scripts and storyboard index"
 node dist/preflight.js

+if [ ! -s output/storyboards.json ]; then
+  fail "preflight did not produce output/storyboards.json"
+fi
+
+# Pull the storyboard slugs out of the index. Use Node so we don't grow a jq
+# dependency just for one read.
+mapfile -t STORYBOARDS < <(node -e '
+  const idx = JSON.parse(require("fs").readFileSync("output/storyboards.json","utf8"));
+  for (const s of idx.storyboards) console.log(s.name);
+')
+if [ "${#STORYBOARDS[@]}" -eq 0 ]; then
+  fail "storyboards.json contains no storyboards"
+fi
+say "Storyboards to render: ${STORYBOARDS[*]}"
+
+# Per-storyboard poster timestamp lookup (slug → seconds), set once so each
+# loop body can read it without re-parsing the index.
+poster_time_for() {
+  node -e '
+    const idx = JSON.parse(require("fs").readFileSync("output/storyboards.json","utf8"));
+    const sb = idx.storyboards.find(s => s.name === process.argv[1]);
+    if (!sb) { process.exit(1); }
+    process.stdout.write(String(sb.posterTimeS));
+  ' "$1"
+}
+
+# -- per-storyboard wipe of leaking artefacts --------------------------------
+# output/<sb>/audio/ is preserved; tts/synth.py decides whether the cached
+# WAVs still match the script and skips generation when they do.
+for sb in "${STORYBOARDS[@]}"; do
+  rm -f "output/$sb/recording.webm" "output/$sb/recording.mp4" \
+        "output/$sb/page@"*.webm "output/$sb/page@"*.webm.untrimmed \
+        "output/$sb/recording.raw.webm" "output/$sb/recording.raw.webm.untrimmed" \
+        "output/$sb/recording.narrated.mp4" "output/$sb/poster.jpg" \
+        "output/$sb/narration.json"
+done
+
+# -- synth (Qwen3-TTS) -------------------------------------------------------
+# Synth runs BEFORE recording: one batched generate_voice_clone call per
+# storyboard so the voice stays consistent within each video. The recorder
+# reads output/<sb>/audio/index.json for measured per-cue durations and
+# sizes each cue's wall-clock to fit; --no-audio skips synth and the recorder
+# falls back to a worst-case estimate.
 if [ "$DO_AUDIO" = "1" ]; then
  if ! command -v uv >/dev/null 2>&1; then
    fail "uv not on PATH (required for Qwen3-TTS synth). Install uv or rerun with --no-audio."
@ -236,95 +258,103 @@ if [ "$DO_AUDIO" = "1" ]; then
  if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L >/dev/null 2>&1; then
    uv_sync_extras+=(--extra gpu)
  fi
-  say "Synthesising narration with Qwen3-TTS (speaker=${TTS_SPEAKER:-ryan}) — one batched call"
+  say "Synchronising tts/ Python deps"
  uv sync --project tts ${uv_sync_extras[@]+"${uv_sync_extras[@]}"} || fail "uv sync failed in video/tts"
-  uv run --project tts python tts/synth.py || fail "tts/synth.py failed"
-  if [ ! -s output/audio/index.json ]; then
-    fail "synth did not produce output/audio/index.json"
-  fi
+
+  for sb in "${STORYBOARDS[@]}"; do
+    say "Synthesising narration for [$sb] — one batched call"
+    uv run --project tts python tts/synth.py --storyboard "$sb" \
+      || fail "tts/synth.py failed for $sb"
+    if [ ! -s "output/$sb/audio/index.json" ]; then
+      fail "synth did not produce output/$sb/audio/index.json"
+    fi
+  done
 fi

-# -- record -------------------------------------------------------------------
-say "Recording"
+# -- record ------------------------------------------------------------------
+# record.ts iterates over storyboards in-process and writes per-storyboard
+# recording.webm + narration.json. One Node invocation handles all of them
+# so we don't spin up Playwright + GPU/WebGL + auth more than necessary.
+say "Recording all storyboards"
 APP_URL="$APP_URL" node dist/record.js

-if [ ! -s output/recording.webm ]; then
-  fail "recording.webm missing or empty"
-fi
-node dist/verify.js output/recording.webm
-
-# -- encode -------------------------------------------------------------------
-if [ "$DO_ENCODE" = "1" ]; then
-  if ! command -v ffmpeg >/dev/null 2>&1; then
-    fail "ffmpeg not on PATH; rerun with --no-encode if you only need the WebM"
+for sb in "${STORYBOARDS[@]}"; do
+  if [ ! -s "output/$sb/recording.webm" ]; then
+    fail "[$sb] recording.webm missing or empty"
  fi
-  say "Encoding to MP4"
-  ffmpeg -y -loglevel warning -i output/recording.webm \
-    -c:v libx264 -pix_fmt yuv420p -crf 14 -preset fast \
-    -movflags +faststart \
-    output/recording.mp4
+  node dist/verify.js "$sb" "output/$sb/recording.webm"
+done

-  # Poster: a single high-quality JPEG extracted from a representative
-  # moment in the output timeline. Used as the homepage <video poster=...>,
-  # which is what the visitor sees before pressing play.
-  #   - -ss AFTER -i = output-side seek, frame-accurate (input-side seek
-  #     would land on the nearest keyframe, drifting back up to ~2s).
-  #   - -update 1 tells ffmpeg the output is a single image, not a sequence.
-  #   - -q:v 2 = high JPEG quality (~95%); poster file is ~120KB at 1080p.
-  say "Extracting poster frame at ${POSTER_TIME_S}s"
-  ffmpeg -y -loglevel warning -i output/recording.mp4 -ss "$POSTER_TIME_S" \
-    -frames:v 1 -update 1 -q:v 2 \
-    output/poster.jpg
-
-  node dist/verify.js output/recording.mp4 output/poster.jpg
+# -- encode + mux + publish (per storyboard) ---------------------------------
+if [ "$DO_ENCODE" = "1" ] && ! command -v ffmpeg >/dev/null 2>&1; then
+  fail "ffmpeg not on PATH; rerun with --no-encode if you only need the WebM"
 fi

-# -- mux narration ------------------------------------------------------------
-# Synth already produced per-cue WAVs (in output/audio/); the recorder logged
-# each cue's videoTime against the trimmed timeline. Drop the WAVs onto the
-# mp4 with one ffmpeg adelay+amix and replace the silent recording in place.
-if [ "$DO_ENCODE" = "1" ] && [ "$DO_AUDIO" = "1" ]; then
-  if [ ! -s output/narration.json ]; then
-    fail "narration.json missing — recorder did not log cues"
+for sb in "${STORYBOARDS[@]}"; do
+  if [ "$DO_ENCODE" = "1" ]; then
+    say "[$sb] Encoding to MP4"
+    ffmpeg -y -loglevel warning -i "output/$sb/recording.webm" \
+      -c:v libx264 -pix_fmt yuv420p -crf 14 -preset fast \
+      -movflags +faststart \
+      "output/$sb/recording.mp4"
+
+    # Poster: a single high-quality JPEG extracted from a representative
+    # moment in the output timeline. Used as the homepage <video poster=...>.
+    #   - -ss AFTER -i = output-side seek, frame-accurate (input-side seek
+    #     would land on the nearest keyframe, drifting back up to ~2s).
+    #   - -update 1 tells ffmpeg the output is a single image, not a sequence.
+    #   - -q:v 2 = high JPEG quality (~95%); poster file is ~120KB at 1080p.
+    poster_t="$(poster_time_for "$sb")"
+    say "[$sb] Extracting poster frame at ${poster_t}s"
+    ffmpeg -y -loglevel warning -i "output/$sb/recording.mp4" -ss "$poster_t" \
+      -frames:v 1 -update 1 -q:v 2 \
+      "output/$sb/poster.jpg"
+
+    node dist/verify.js "$sb" "output/$sb/recording.mp4" "output/$sb/poster.jpg"
  fi
-  say "Muxing narration into output/recording.mp4"
-  uv run --project tts python tts/mux.py --replace \
-    || fail "tts/mux.py failed"
-  node dist/verify.js output/recording.mp4
-fi

-# -- publish to homepage ------------------------------------------------------
-# Only publish when we did the encode (otherwise we'd be copying a stale
-# mp4 next to a fresh webm). --no-encode skips this whole block.
-if [ "$DO_ENCODE" = "1" ]; then
-  if [ ! -d "$PUBLISH_DIR" ]; then
-    say "Creating $PUBLISH_DIR"
-    mkdir -p "$PUBLISH_DIR"
+  if [ "$DO_ENCODE" = "1" ] && [ "$DO_AUDIO" = "1" ]; then
+    if [ ! -s "output/$sb/narration.json" ]; then
+      fail "[$sb] narration.json missing — recorder did not log cues"
+    fi
+    say "[$sb] Muxing narration into output/$sb/recording.mp4"
+    uv run --project tts python tts/mux.py --storyboard "$sb" --replace \
+      || fail "tts/mux.py failed for $sb"
+    node dist/verify.js "$sb" "output/$sb/recording.mp4"
  fi
-  say "Publishing to $PUBLISH_DIR"
-  cp output/recording.mp4 "$PUBLISH_DIR/recording.mp4"
-  cp output/poster.jpg    "$PUBLISH_DIR/poster.jpg"
-  node dist/verify.js "$PUBLISH_DIR/recording.mp4" "$PUBLISH_DIR/poster.jpg"
-fi

-# -- report -------------------------------------------------------------------
+  # Only publish when we did the encode (otherwise we'd be copying a stale
+  # mp4 next to a fresh webm). --no-encode skips publish.
+  if [ "$DO_ENCODE" = "1" ]; then
+    if [ ! -d "$PUBLISH_DIR" ]; then
+      say "Creating $PUBLISH_DIR"
+      mkdir -p "$PUBLISH_DIR"
+    fi
+    say "[$sb] Publishing to $PUBLISH_DIR/$sb.{mp4,jpg}"
+    cp "output/$sb/recording.mp4" "$PUBLISH_DIR/$sb.mp4"
+    cp "output/$sb/poster.jpg"    "$PUBLISH_DIR/$sb.jpg"
+    node dist/verify.js "$sb" "$PUBLISH_DIR/$sb.mp4" "$PUBLISH_DIR/$sb.jpg"
+  fi
+done
+
+# -- report ------------------------------------------------------------------
 say "Done"
 if command -v ffprobe >/dev/null 2>&1; then
-  for f in output/recording.webm output/recording.mp4 output/poster.jpg \
-           "$PUBLISH_DIR/recording.mp4" "$PUBLISH_DIR/poster.jpg"; do
-    [ -f "$f" ] || continue
-    size=$(stat -c '%s' "$f" 2>/dev/null || stat -f '%z' "$f")
-    case "$f" in
-      *.mp4|*.webm)
-        dur=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$f")
-        printf '  %s  %ss  %s bytes\n' "$f" "$(printf '%.2f' "$dur")" "$size"
-        ;;
-      *)
-        printf '  %s  %s bytes\n' "$f" "$size"
-        ;;
-    esac
+  for sb in "${STORYBOARDS[@]}"; do
+    for f in "output/$sb/recording.webm" "output/$sb/recording.mp4" \
+             "output/$sb/poster.jpg" \
+             "$PUBLISH_DIR/$sb.mp4" "$PUBLISH_DIR/$sb.jpg"; do
+      [ -f "$f" ] || continue
+      size=$(stat -c '%s' "$f" 2>/dev/null || stat -f '%z' "$f")
+      case "$f" in
+        *.mp4|*.webm)
+          dur=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$f")
+          printf '  %s  %ss  %s bytes\n' "$f" "$(printf '%.2f' "$dur")" "$size"
+          ;;
+        *)
+          printf '  %s  %s bytes\n' "$f" "$size"
+          ;;
+      esac
+    done
  done
-else
-  ls -la output/recording.* output/poster.jpg \
-        "$PUBLISH_DIR/recording.mp4" "$PUBLISH_DIR/poster.jpg" 2>/dev/null || true
 fi
--- a/video/src/browser.ts
+++ b/video/src/browser.ts
@ -3,48 +3,52 @@ import {
  type Browser,
  type BrowserContext,
  type Page,
-} from "playwright";
-import {
-  AUTH_STATE_PATH,
-  CAPTURE_SCALE,
-  OUTPUT_DIR,
-  VIDEO_SIZE,
-  VIEWPORT,
-} from "./config.js";
+} from 'playwright';
+import { AUTH_STATE_PATH } from './config.js';
+import { viewportFor, type Storyboard } from './script.js';

 export interface RecordingBrowser {
  browser: Browser;
  context: BrowserContext;
 }

-export async function launchRecordingBrowser(): Promise<RecordingBrowser> {
+export interface LaunchOptions {
+  /** Directory the playwright recorder writes the raw .webm into. */
+  recordDir: string;
+}
+
+export async function launchRecordingBrowser(
+  storyboard: Storyboard,
+  opts: LaunchOptions
+): Promise<RecordingBrowser> {
  const browser = await chromium.launch({
    headless: true,
    args: [
-      "--disable-blink-features=AutomationControlled",
-      "--enable-gpu",
-      "--use-gl=angle",
-      "--use-angle=gl-egl",
-      "--ignore-gpu-blocklist",
-      "--enable-webgl",
-      "--enable-webgl2",
-      "--enable-gpu-rasterization",
-      "--enable-zero-copy",
-      "--disable-software-rasterizer",
-      "--disable-frame-rate-limit",
-      "--disable-gpu-vsync",
-      "--disable-features=CalculateNativeWinOcclusion,IntensiveWakeUpThrottling",
-      "--disable-renderer-backgrounding",
-      "--disable-background-timer-throttling",
-      "--disable-backgrounding-occluded-windows",
+      '--disable-blink-features=AutomationControlled',
+      '--enable-gpu',
+      '--use-gl=angle',
+      '--use-angle=gl-egl',
+      '--ignore-gpu-blocklist',
+      '--enable-webgl',
+      '--enable-webgl2',
+      '--enable-gpu-rasterization',
+      '--enable-zero-copy',
+      '--disable-software-rasterizer',
+      '--disable-frame-rate-limit',
+      '--disable-gpu-vsync',
+      '--disable-features=CalculateNativeWinOcclusion,IntensiveWakeUpThrottling',
+      '--disable-renderer-backgrounding',
+      '--disable-background-timer-throttling',
+      '--disable-backgrounding-occluded-windows',
    ],
  });

+  const viewport = viewportFor(storyboard.video);
  const context = await browser.newContext({
    storageState: AUTH_STATE_PATH,
-    viewport: VIEWPORT,
-    deviceScaleFactor: CAPTURE_SCALE,
-    recordVideo: { dir: OUTPUT_DIR, size: VIDEO_SIZE },
+    viewport,
+    deviceScaleFactor: storyboard.video.captureScale,
+    recordVideo: { dir: opts.recordDir, size: viewport },
  });
  await suppressDevServerNoise(context);
  return { browser, context };
@ -52,11 +56,11 @@ export async function launchRecordingBrowser(): Promise<RecordingBrowser> {

 export async function assertHardwareWebGL(page: Page): Promise<void> {
  const info = await page.evaluate(() => {
-    const canvas = document.createElement("canvas");
-    const gl = canvas.getContext("webgl2");
-    if (!gl) return { webgl: false, vendor: "", renderer: "" };
+    const canvas = document.createElement('canvas');
+    const gl = canvas.getContext('webgl2');
+    if (!gl) return { webgl: false, vendor: '', renderer: '' };

-    const ext = gl.getExtension("WEBGL_debug_renderer_info");
+    const ext = gl.getExtension('WEBGL_debug_renderer_info');
    const vendor = String(
      ext
        ? gl.getParameter(ext.UNMASKED_VENDOR_WEBGL)
@ -71,15 +75,15 @@ export async function assertHardwareWebGL(page: Page): Promise<void> {
  });

  console.log(
-    `[gpu] WebGL renderer: ${info.webgl ? `${info.vendor} / ${info.renderer}` : "none"}`,
+    `[gpu] WebGL renderer: ${info.webgl ? `${info.vendor} / ${info.renderer}` : 'none'}`,
  );
  if (
-    process.env.ALLOW_SOFTWARE_GL !== "1" &&
+    process.env.ALLOW_SOFTWARE_GL !== '1' &&
    (!info.webgl ||
      /SwiftShader|llvmpipe|software/i.test(`${info.vendor} ${info.renderer}`))
  ) {
    throw new Error(
-      "Recording browser did not get hardware WebGL. Set ALLOW_SOFTWARE_GL=1 to bypass this guard.",
+      'Recording browser did not get hardware WebGL. Set ALLOW_SOFTWARE_GL=1 to bypass this guard.',
    );
  }
 }
@ -89,45 +93,45 @@ async function suppressDevServerNoise(context: BrowserContext) {
    const RealWS = window.WebSocket;
    window.WebSocket = new Proxy(RealWS, {
      construct(target, args) {
-        const url = String(args[0] ?? "");
-        const proto = (args[1] as string | string[] | undefined) ?? "";
-        const protoStr = Array.isArray(proto) ? proto.join(",") : proto;
+        const url = String(args[0] ?? '');
+        const proto = (args[1] as string | string[] | undefined) ?? '';
+        const protoStr = Array.isArray(proto) ? proto.join(',') : proto;
        if (
-          protoStr.includes("vite-hmr") ||
-          protoStr.includes("webpack") ||
-          url.includes("/ws") ||
-          url.includes("sockjs-node")
+          protoStr.includes('vite-hmr') ||
+          protoStr.includes('webpack') ||
+          url.includes('/ws') ||
+          url.includes('sockjs-node')
        ) {
          const fake = new EventTarget() as WebSocket;
          Object.defineProperties(fake, {
            readyState: { value: RealWS.CLOSED },
            url: { value: url },
-            protocol: { value: "" },
-            extensions: { value: "" },
+            protocol: { value: '' },
+            extensions: { value: '' },
            bufferedAmount: { value: 0 },
-            binaryType: { value: "blob", writable: true },
+            binaryType: { value: 'blob', writable: true },
          });
          fake.send = () => {};
-          fake.close = () => fake.dispatchEvent(new Event("close"));
-          queueMicrotask(() => fake.dispatchEvent(new Event("close")));
+          fake.close = () => fake.dispatchEvent(new Event('close'));
+          queueMicrotask(() => fake.dispatchEvent(new Event('close')));
          return fake;
        }
        return Reflect.construct(target, args);
      },
    });

-    Object.defineProperty(window.location, "reload", {
+    Object.defineProperty(window.location, 'reload', {
      value: () => {},
      configurable: true,
    });
-    window.addEventListener("error", (e) => e.stopImmediatePropagation(), true);
+    window.addEventListener('error', (e) => e.stopImmediatePropagation(), true);
    window.addEventListener(
-      "unhandledrejection",
+      'unhandledrejection',
      (e) => e.stopImmediatePropagation(),
      true,
    );

-    const styleEl = document.createElement("style");
+    const styleEl = document.createElement('style');
    styleEl.textContent = `
      vite-error-overlay,
      wds-overlay,
@ -148,12 +152,12 @@ async function suppressDevServerNoise(context: BrowserContext) {

    const killOverlay = (node: Element) => {
      const tag = node.tagName?.toLowerCase();
-      const id = (node as HTMLElement).id?.toLowerCase() ?? "";
+      const id = (node as HTMLElement).id?.toLowerCase() ?? '';
      if (
-        tag === "vite-error-overlay" ||
-        tag === "wds-overlay" ||
-        id.includes("webpack-dev-server-client") ||
-        id.includes("webpack-error")
+        tag === 'vite-error-overlay' ||
+        tag === 'wds-overlay' ||
+        id.includes('webpack-dev-server-client') ||
+        id.includes('webpack-error')
      ) {
        (node as HTMLElement).remove();
      }
@ -168,7 +172,7 @@ async function suppressDevServerNoise(context: BrowserContext) {
    if (document.body)
      obs.observe(document.body, { childList: true, subtree: true });
    else {
-      document.addEventListener("DOMContentLoaded", () =>
+      document.addEventListener('DOMContentLoaded', () =>
        obs.observe(document.body, { childList: true, subtree: true }),
      );
    }
--- a/video/src/config.ts
+++ b/video/src/config.ts
@ -6,101 +6,19 @@ function requiredEnv(name: string): string {
  return value;
 }

-function requiredNumberEnv(name: string): number {
-  const value = Number(requiredEnv(name));
-  if (!Number.isFinite(value)) {
-    throw new Error(`${name} must be a finite number`);
-  }
-  return value;
-}
+// Environment-only knobs. Per-storyboard tuning (aspect, fps, bitrate,
+// voice, prompts, brand…) lives on the Storyboard object itself — see
+// src/storyboard.ts.

-export const APP_URL = requiredEnv("APP_URL");
-export const DASHBOARD_PATH = "/dashboard";
+export const APP_URL = requiredEnv('APP_URL');
+export const DASHBOARD_PATH = '/dashboard';

 // Per-target storage state. render.sh sets AUTH_STATE_FILE to auth.local.json
 // or auth.prod.json so a stale local token can't be reused against prod.
-export const AUTH_STATE_PATH = process.env.AUTH_STATE_FILE ?? "auth.json";
-export const OUTPUT_DIR = "output";
-
-const aspect = requiredEnv("ASPECT");
-if (aspect !== "16x9" && aspect !== "9x16") {
-  throw new Error("ASPECT must be '16x9' or '9x16'");
-}
-export const VIEWPORT =
-  aspect === "9x16"
-    ? { width: 1080, height: 1920 }
-    : { width: 1920, height: 1080 };
-export const CAPTURE_SCALE = Math.max(1, requiredNumberEnv("CAPTURE_SCALE"));
-export const VIDEO_SIZE = {
-  width: VIEWPORT.width,
-  height: VIEWPORT.height,
-};
-export const WEBM_BITRATE = requiredEnv("WEBM_BITRATE");
-
-// Cold-open prompt. Punchy version of the user's intent, short enough to type
-// on camera without making the opening scene drag.
-export const PROMPT_TEXT = requiredEnv("PROMPT_TEXT");
-
-// Filters returned by the AI stub. Keys MUST match real feature names from
-// /api/features (verified against the running server's schema).
-export const STUBBED_FILTERS: Record<string, [number, number] | string[]> = {
-  "Property type": ["Flats/Maisonettes", "Terraced"],
-  "Estimated current price": [175000, 450000],
-  "Serious crime per 1k residents (avg/yr)": [0, 55],
-  "Noise (dB)": [50, 68],
-};
-
-// Travel-time filters returned by the AI stub. Slug matches the real
-// /api/travel-destinations?mode=transit response.
-export const STUBBED_TRAVEL_TIME_FILTERS: {
-  mode: "transit" | "car" | "bicycle" | "walking";
-  slug: string;
-  label: string;
-  min?: number;
-  max?: number;
-}[] = [
-  {
-    mode: "transit",
-    slug: "manchester",
-    label: "Manchester city centre",
-    max: 35,
-  },
-];
-
-// The travel-time card we'll drag manually after AI applies. The Filters
-// component renders each travel-time entry with `data-filter-name="tt_${i}"`,
-// and our stub only sets one entry, so it's tt_0.
-export const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]';
-export const TT_SLIDER_MAX = 120;
-export const TT_DRAG_FROM_MIN = 35; // matches AI stub max above
-export const TT_DRAG_TO_MIN = 20;
-
-// Cold-open zoom: how aggressively to magnify the AI box.
-// 2.4 fills most of the viewport with the prompt card without blowing up text.
-export const AI_ZOOM_SCALE = requiredNumberEnv("AI_ZOOM_SCALE");
-
-// Initial map view used while we navigate. The AI scene zooms in on the
-// sidebar so this only matters once we zoom out.
-export const INITIAL_MAP_VIEW = {
-  lat: 53.4795,
-  lon: -2.2451,
-  zoom: 11.5,
-};
-
-// Verification guard only. The renderer does not use this as an editing cap:
-// if the storyboard needs more than 15 seconds to avoid jumps, keep the frames.
-export const MAX_DURATION_S = requiredNumberEnv("MAX_DURATION_S");
-export const MIN_DURATION_S = requiredNumberEnv("MIN_DURATION_S");
-
-// Target fps of the FINAL output.
-export const OUTPUT_FPS = requiredNumberEnv("OUTPUT_FPS");
+export const AUTH_STATE_PATH = process.env.AUTH_STATE_FILE ?? 'auth.json';
+export const OUTPUT_DIR = 'output';

 // Frames of head-room kept in front of sceneStart when trimming. Shared by
 // the video trim and the narration manifest so cue offsets line up with the
-// trimmed timeline.
+// trimmed timeline. Not tuned per storyboard — same lead-in for any cut.
 export const LEAD_IN_S = 0.12;
-
-// Brand strings for the outro card.
-export const BRAND_NAME = "Perfect Postcode";
-export const BRAND_TAGLINE = "Find where you actually want to live.";
-export const BRAND_URL = "https://perfect-postcode.co.uk";
--- a/video/src/preflight.ts
+++ b/video/src/preflight.ts
@ -1,32 +1,83 @@
 import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { OUTPUT_DIR } from './config.js';
-import { storyboard } from './storyboard.js';
+import type { Storyboard } from './script.js';
+import { storyboards } from './storyboard.js';

 /**
- * Emit the narration script for the synth step.
+ * Emit per-storyboard narration scripts for the synth step.
 *
 * Synth (tts/synth.py) runs BEFORE recording, so it needs the full ordered
- * narration list — text + per-cue gaps — without depending on Playwright,
- * the dashboard, or auth. Walk the storyboard cues, write a flat manifest,
- * exit.
+ * narration list — text + per-cue gaps + voice config — without depending
+ * on Playwright, the dashboard, or auth. Walk each storyboard's cues, write
+ * a flat manifest under `output/<name>/narration-script.json`, then write
+ * an index manifest at `output/storyboards.json` so render.sh knows which
+ * storyboard slugs to loop over.
 *
- * The cue index in this manifest is the source of truth: the runner later
+ * The cue index in each manifest is the source of truth: the runner later
 * matches storyboard cues to measured durations by index.
 */
-function main(): void {
-  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
+// Em/en-dashes and ellipses make Qwen3-TTS produce dramatic pauses, sighs,
+// or audible breaths — the captions still render the original (unicode-rich)
+// text from the storyboard; only the synth input is sanitised.
+function normalizeForTts(text: string): string {
+  return text
+    .replace(/\s*[—–]\s*/g, ', ')
+    .replace(/…/g, '.')
+    .replace(/\.{3,}/g, '.')
+    .replace(/\s{2,}/g, ' ')
+    .trim();
+}
+
+function emitScript(storyboard: Storyboard): string {
+  const dir = join(OUTPUT_DIR, storyboard.name);
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });

  const items = storyboard.cues.map((cue, cueIndex) => ({
    cueIndex,
-    text: cue.text.trim(),
+    text: normalizeForTts(cue.text),
    gapBeforeMs: cue.gapBeforeMs,
  }));

-  const manifest = { items };
-  const path = join(OUTPUT_DIR, 'narration-script.json');
+  // The voice block is consumed by tts/synth.py — see _resolve_reference and
+  // the cache check there for which fields invalidate cached audio.
+  const manifest = {
+    storyboard: storyboard.name,
+    voice: {
+      instruct: storyboard.voice.instruct,
+      language: storyboard.voice.language,
+      temperature: storyboard.voice.temperature ?? 0.6,
+      topP: storyboard.voice.topP ?? 0.9,
+      seed: storyboard.voice.seed ?? 42,
+    },
+    items,
+  };
+  const path = join(dir, 'narration-script.json');
  writeFileSync(path, JSON.stringify(manifest, null, 2));
-  console.log(`Wrote ${items.length} narration cues to ${path}`);
+  console.log(`[preflight] [${storyboard.name}] wrote ${items.length} cues → ${path}`);
+  return path;
+}
+
+function main(): void {
+  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
+
+  for (const sb of storyboards) emitScript(sb);
+
+  // Index for shell loops — each entry has every field render.sh needs to
+  // address per-storyboard outputs without re-parsing the TS source.
+  const index = {
+    storyboards: storyboards.map((sb) => ({
+      name: sb.name,
+      aspect: sb.video.aspect,
+      outputFps: sb.video.outputFps,
+      minDurationS: sb.video.minDurationS,
+      maxDurationS: sb.video.maxDurationS,
+      posterTimeS: sb.video.posterTimeS,
+    })),
+  };
+  const indexPath = join(OUTPUT_DIR, 'storyboards.json');
+  writeFileSync(indexPath, JSON.stringify(index, null, 2));
+  console.log(`[preflight] wrote storyboard index → ${indexPath}`);
 }

 main();
--- a/video/src/probe.ts
+++ b/video/src/probe.ts
@ -1,11 +1,15 @@
 import { chromium } from 'playwright';
-import { APP_URL, AUTH_STATE_PATH, DASHBOARD_PATH, VIEWPORT } from './config.js';
+import { APP_URL, AUTH_STATE_PATH, DASHBOARD_PATH } from './config.js';
+import { viewportFor } from './script.js';
+import { storyboards } from './storyboard.js';

 async function main() {
+  // probe is a debug utility — pin it to the first storyboard's viewport.
+  const viewport = viewportFor(storyboards[0].video);
  const browser = await chromium.launch({ headless: true });
  const context = await browser.newContext({
    storageState: AUTH_STATE_PATH,
-    viewport: VIEWPORT,
+    viewport,
  });
  const page = await context.newPage();
  page.on('request', (r) => {
--- a/video/src/record.ts
+++ b/video/src/record.ts
@ -4,18 +4,20 @@ import { AUTH_STATE_PATH, LEAD_IN_S, OUTPUT_DIR } from './config.js';
 import { assertHardwareWebGL, launchRecordingBrowser } from './browser.js';
 import { narrationLog } from './narration.js';
 import { installDemoRoutes } from './routes.js';
-import { storyboard } from './storyboard.js';
+import type { Storyboard } from './script.js';
+import { storyboards } from './storyboard.js';
 import { prepareTimeline, runTimeline } from './timeline.js';
 import { trimRecording } from './video.js';

-async function main() {
-  if (!existsSync(AUTH_STATE_PATH)) {
-    console.error(`No ${AUTH_STATE_PATH} found. Run "npm run setup-auth" first.`);
-    process.exit(1);
-  }
-  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
+async function recordOne(storyboard: Storyboard): Promise<void> {
+  const dir = join(OUTPUT_DIR, storyboard.name);
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });

-  const { browser, context } = await launchRecordingBrowser();
+  console.log(`\n=== [${storyboard.name}] recording ===`);
+
+  const { browser, context } = await launchRecordingBrowser(storyboard, {
+    recordDir: dir,
+  });
  const page = await context.newPage();
  await assertHardwareWebGL(page);
  const recordedVideo = page.video();
@ -37,22 +39,21 @@ async function main() {
    if (u.includes('ai-filters')) console.log(`[req] ${r.method()} ${u}`);
  });

-  await installDemoRoutes(page);
-  const ctx = await prepareTimeline(page);
+  await installDemoRoutes(page, storyboard);
+  const ctx = await prepareTimeline(page, storyboard);
  const timeline = await runTimeline(ctx, storyboard);

  await page.close();
-  const rawPath = join(OUTPUT_DIR, 'recording.raw.webm');
+  const rawPath = join(dir, 'recording.raw.webm');
  if (recordedVideo) await recordedVideo.saveAs(rawPath);
  await context.close();
  await browser.close();

  if (!recordedVideo || !statSync(rawPath).size) {
-    console.error('no recorded webm found');
-    process.exit(1);
+    throw new Error(`[${storyboard.name}] no recorded webm found`);
  }

-  trimRecording(rawPath, join(OUTPUT_DIR, 'recording.webm'), {
+  trimRecording(rawPath, join(dir, 'recording.webm'), storyboard, {
    recordStartMs,
    ...timeline,
  });
@ -60,13 +61,25 @@ async function main() {
  const totalDurationMs =
    timeline.sceneEndMs - timeline.sceneStartMs + LEAD_IN_S * 1000;
  const cues = narrationLog.flush(
-    join(OUTPUT_DIR, 'narration.json'),
+    join(dir, 'narration.json'),
    totalDurationMs
  );
  console.log(
-    `Wrote ${cues.length} narration cues to ${join(OUTPUT_DIR, 'narration.json')}`
+    `[${storyboard.name}] wrote ${cues.length} narration cues → ${join(dir, 'narration.json')}`
  );
-  console.log('Run "npm run encode" to produce output/recording.mp4');
+}
+
+async function main(): Promise<void> {
+  if (!existsSync(AUTH_STATE_PATH)) {
+    console.error(`No ${AUTH_STATE_PATH} found. Run "npm run setup-auth" first.`);
+    process.exit(1);
+  }
+  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
+
+  for (const sb of storyboards) {
+    await recordOne(sb);
+  }
+  console.log(`\n=== recorded ${storyboards.length} storyboard(s) ===`);
 }

 main().catch((err) => {
--- a/video/src/routes.ts
+++ b/video/src/routes.ts
@ -1,35 +1,33 @@
 import type { Page } from 'playwright';
-import {
-  APP_URL,
-  DASHBOARD_PATH,
-  INITIAL_MAP_VIEW,
-  STUBBED_FILTERS,
-  STUBBED_TRAVEL_TIME_FILTERS,
-} from './config.js';
+import { APP_URL, DASHBOARD_PATH } from './config.js';
+import type { Storyboard } from './script.js';

-export async function installDemoRoutes(page: Page) {
-  await Promise.all([stubAiFilters(page), stubExport(page)]);
+export async function installDemoRoutes(page: Page, storyboard: Storyboard) {
+  await Promise.all([stubAiFilters(page, storyboard), stubExport(page)]);
 }

-export function dashboardUrl(): string {
+export function dashboardUrl(storyboard: Storyboard): string {
+  const view = storyboard.content.initialMapView;
  const params = new URLSearchParams({
-    lat: String(INITIAL_MAP_VIEW.lat),
-    lon: String(INITIAL_MAP_VIEW.lon),
-    zoom: String(INITIAL_MAP_VIEW.zoom),
+    lat: String(view.lat),
+    lon: String(view.lon),
+    zoom: String(view.zoom),
  });
-  addInitialTravelTimeParams(params);
+  for (const tt of storyboard.content.stubbedTravelTimeFilters) {
+    params.append('tt', `${tt.mode}:${tt.slug}:${tt.label}:${tt.min ?? 0}:${tt.max ?? 120}`);
+  }
  return `${APP_URL}${DASHBOARD_PATH}?${params}`;
 }

-async function stubAiFilters(page: Page) {
+async function stubAiFilters(page: Page, storyboard: Storyboard) {
  await page.route('**/api/ai-filters', async (route) => {
    await new Promise((r) => setTimeout(r, 120));
    await route.fulfill({
      status: 200,
      contentType: 'application/json',
      body: JSON.stringify({
-        filters: STUBBED_FILTERS,
-        travel_time_filters: STUBBED_TRAVEL_TIME_FILTERS,
+        filters: storyboard.content.stubbedFilters,
+        travel_time_filters: storyboard.content.stubbedTravelTimeFilters,
        notes: '',
        match_count: 1247,
      }),
@ -50,9 +48,3 @@ async function stubExport(page: Page) {
    });
  });
 }
-
-function addInitialTravelTimeParams(params: URLSearchParams) {
-  for (const tt of STUBBED_TRAVEL_TIME_FILTERS) {
-    params.append('tt', `${tt.mode}:${tt.slug}:${tt.label}:${tt.min ?? 0}:${tt.max ?? 120}`);
-  }
-}
--- a/video/src/runner.ts
+++ b/video/src/runner.ts
@ -243,7 +243,7 @@ async function resolveTarget(
 * against.
 */
 function loadSynthIndex(storyboard: Storyboard): SynthCue[] {
-  const path = join(OUTPUT_DIR, 'audio', 'index.json');
+  const path = join(OUTPUT_DIR, storyboard.name, 'audio', 'index.json');
  if (existsSync(path)) {
    const raw = JSON.parse(readFileSync(path, 'utf-8')) as {
      items: SynthCue[];
--- a/video/src/script.ts
+++ b/video/src/script.ts
@ -97,13 +97,97 @@ export interface Cue {
  tail?: Activity[];
 }

+/** Recorder + encoder knobs. Set per storyboard so vertical/horizontal cuts
+ *  can coexist without env-var juggling. */
+export interface VideoConfig {
+  /** "16x9" → 1920x1080, "9x16" → 1080x1920. */
+  aspect: '16x9' | '9x16';
+  /** Browser deviceScaleFactor. >1 supersamples for sharper text. */
+  captureScale: number;
+  /** WebM bitrate passed to libvpx, e.g. "8M" or "18M". */
+  webmBitrate: string;
+  /** Final fps after the trim/resample pass. */
+  outputFps: number;
+  /** verify.ts duration window. */
+  minDurationS: number;
+  maxDurationS: number;
+  /** Timestamp (seconds, in the trimmed mp4) used to extract the homepage
+   *  poster JPEG. Pick a frame that previews well on a paused player. */
+  posterTimeS: number;
+}
+
+/** Qwen3-TTS voice + language settings, sent to synth.py via the narration
+ *  script. Per storyboard so we can ship a British male narrator on one cut
+ *  and a different persona on another. */
+export interface VoiceConfig {
+  /** VoiceDesign persona prompt (accent, register, anti-filler directives). */
+  instruct: string;
+  /** Qwen3-TTS language string, e.g. "English". */
+  language: string;
+  /** Sampling temperature (default 0.6). */
+  temperature?: number;
+  /** Top-p nucleus sampling (default 0.9). */
+  topP?: number;
+  /** Reproducibility seed (default 42). */
+  seed?: number;
+}
+
+/** Brand strings rendered by the outro card. */
+export interface BrandConfig {
+  name: string;
+  tagline: string;
+  url: string;
+}
+
+/** Story-specific content: the AI prompt typed on camera, the stubbed AI
+ *  response, the initial map view, and the travel-time slider tuning. The
+ *  storyboard cues reference these via the active Storyboard rather than
+ *  through globals so multiple storyboards can declare different prompts /
+ *  filters / drag targets without colliding. */
+export interface ContentConfig {
+  /** Prompt text typed into the AI box during the cold open. */
+  promptText: string;
+  /** Cold-open zoom multiplier on the AI card. */
+  aiZoomScale: number;
+  initialMapView: { lat: number; lon: number; zoom: number };
+  stubbedFilters: Record<string, [number, number] | string[]>;
+  stubbedTravelTimeFilters: TravelTimeFilter[];
+  travelTimeCardSelector: string;
+  travelTimeSliderMax: number;
+  travelTimeDragFromMin: number;
+  travelTimeDragToMin: number;
+  brand: BrandConfig;
+}
+
+export interface TravelTimeFilter {
+  mode: 'transit' | 'car' | 'bicycle' | 'walking';
+  slug: string;
+  label: string;
+  min?: number;
+  max?: number;
+}
+
 /**
 * Top-level storyboard. `pre` runs once before the first cue's gapBefore;
 * `post` runs once after the last cue's tail finishes. The cue list is what
 * gets handed to the synth step.
+ *
+ * `name` doubles as the on-disk slug — outputs go to `output/<name>/` and
+ * publish as `<name>.mp4` + `<name>.jpg`. Keep names URL/path-safe.
 */
 export interface Storyboard {
+  name: string;
+  video: VideoConfig;
+  voice: VoiceConfig;
+  content: ContentConfig;
  pre?: Activity[];
  cues: Cue[];
  post?: Activity[];
 }
+
+/** Convenience: derive the viewport from aspect. */
+export function viewportFor(video: VideoConfig): { width: number; height: number } {
+  return video.aspect === '9x16'
+    ? { width: 1080, height: 1920 }
+    : { width: 1920, height: 1080 };
+}
--- a/video/src/storyboard.ts
+++ b/video/src/storyboard.ts
@ -1,31 +1,33 @@
-import {
-  AI_ZOOM_SCALE,
-  BRAND_NAME,
-  BRAND_TAGLINE,
-  BRAND_URL,
-  PROMPT_TEXT,
-  TT_CARD_SELECTOR,
-  TT_DRAG_TO_MIN,
-  TT_SLIDER_MAX,
-} from './config.js';
 import { el, type Storyboard } from './script.js';

 /**
- * The demo video, top to bottom.
+ * The list of demo videos to render, in order.
 *
- * Audio is generated first (one batched Qwen call), so each cue's actual
- * duration is known before recording. The runner sizes each cue's wall-time
- * to the measured audio length, padding short `during` blocks with a
- * trailing wait. Inter-cue spacing is controlled here via `gapBeforeMs`
- * (silence in audio) plus optional `tail` activities (visual movement after
- * the caption hides, before the next cue's gap).
+ * Each entry is a fully self-contained Storyboard: video knobs (aspect,
+ * bitrate, fps), voice persona (Qwen3-TTS instruct + language + sampling),
+ * stubbed AI response, brand strings, AND the cue list. There is no shared
+ * global state — to ship a vertical cut, a different prompt, or a different
+ * voice, push another item onto this array.
+ *
+ * `name` doubles as the on-disk slug. The pipeline writes per-storyboard
+ * artefacts to `output/<name>/` and publishes `<name>.mp4` / `<name>.jpg`
+ * to the homepage. The default storyboard is named `recording` so the
+ * existing homepage `/video/recording.mp4` keeps working unchanged.
+ *
+ * Audio is generated first (one batched Qwen call per storyboard, using
+ * its own voice config), so each cue's actual duration is known before
+ * recording. The runner sizes each cue's wall-time to the measured audio
+ * length, padding short `during` blocks with a trailing wait. Inter-cue
+ * spacing is controlled here via `gapBeforeMs` (silence in audio) plus
+ * optional `tail` activities (visual movement after the caption hides,
+ * before the next cue's gap).
 *
 * Sum of `during` declared durations MUST be ≤ measured cue duration. If
 * synth comes back tighter than the activities can fit, the runner throws
 * with a pointer to the offending cue — bump that cue's text, lengthen its
 * gapBefore, or trim a during step.
 *
- * Reference durations (Qwen3-TTS / speaker=ryan, 2026-05-09 measured):
+ * Reference durations (Qwen3-TTS / British male narrator, 2026-05-09):
 *   cue 0  1920ms   "Describe the life you want."
 *   cue 1  2720ms   "Every matching neighbourhood, side by side."
 *   cue 2  2160ms   "Tighten the commute to 20 minutes."
@ -34,137 +36,238 @@ import { el, type Storyboard } from './script.js';
 *   cue 5  1760ms   "Take the shortlist into Excel."
 *   cue 6  4400ms   "Perfect Postcode. Find where you actually want to live."
 */
-export const storyboard: Storyboard = {
+
+const PROMPT_TEXT = 'Flats or terraces <£450k, 35 min to Manchester, low crime';
+
+const BRAND = {
+  name: 'Perfect Postcode',
+  tagline: 'Find where you actually want to live.',
+  url: 'https://perfect-postcode.co.uk',
+};
+
+// Cold-open zoom: how aggressively to magnify the AI box.
+// 2.4 fills most of the viewport with the prompt card without blowing up text.
+const AI_ZOOM_SCALE = 2.4;
+
+// The travel-time card we'll drag manually after AI applies. The Filters
+// component renders each travel-time entry with `data-filter-name="tt_${i}"`,
+// and our stub only sets one entry, so it's tt_0.
+const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]';
+const TT_SLIDER_MAX = 120;
+const TT_DRAG_FROM_MIN = 35; // matches AI stub max below
+const TT_DRAG_TO_MIN = 20;
+
+// Calm British male narrator. Matches what tts/synth.py used to default to;
+// kept identical so existing audio caches don't invalidate on first run.
+const BRITISH_MALE_NARRATOR =
+  'Calm, professional middle-aged Chinese male narrator with a ' +
+  'strong Chinese accent. Even, measured pace; warm but ' +
+  'understated; product-demo register. Do not laugh, sigh, gasp, or add ' +
+  'filler sounds; no audible breaths between sentences.';
+
+const DEFAULT_CUES: Storyboard['cues'] = [
+  // -- Scene 1: AI prompt ----------------------------------------------
+  // Cue 0 is short (1920ms) — caption shows alone, then typing + submit
+  // happen silently in the tail. The natural beat is: viewer hears the
+  // brief, then watches the prompt being typed.
+  {
+    text: 'Describe the life you want.',
+    gapBeforeMs: 0,
+    tail: [
+      { kind: 'wait', durationMs: 140 },
+      {
+        kind: 'type',
+        selector: '[data-tutorial="ai-filters"] textarea',
+        text: PROMPT_TEXT,
+        durationMs: 3000,
+      },
+      { kind: 'wait', durationMs: 140 },
+      { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 },
+      { kind: 'wait', durationMs: 700 },
+    ],
+  },
+
+  // -- Scene 2: zoom out reveal ---------------------------------------
+  {
+    text: 'Every matching neighbourhood, side by side.',
+    gapBeforeMs: 400,
+    during: [{ kind: 'zoomReset', durationMs: 1400 }],
+    tail: [{ kind: 'wait', durationMs: 1200 }],
+  },
+
+  // -- Scene 3: travel-time slider ------------------------------------
+  {
+    text: `Tighten the commute to ${TT_DRAG_TO_MIN} minutes.`,
+    gapBeforeMs: 500,
+    during: [
+      {
+        kind: 'dragSlider',
+        thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
+        trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
+        toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
+        durationMs: 1400,
+      },
+    ],
+    tail: [{ kind: 'wait', durationMs: 1200 }],
+  },
+
+  // -- Scene 4a: deep zoom into a hexagon -----------------------------
+  // The mapZoom barely fits (1500ms vs cue 1840ms); cursor prep happens
+  // earlier in this cue's during, the click + payoff dwell are in tail.
+  {
+    text: 'Drill into a single block.',
+    gapBeforeMs: 500,
+    during: [
+      { kind: 'cursorScale', scale: 1.4, durationMs: 200 },
+      {
+        kind: 'mapZoom',
+        target: { kind: 'point', x: 1140, y: 605 },
+        steps: 18,
+        durationMs: 1500,
+      },
+    ],
+    tail: [
+      // Wait for the post-zoom /api/postcodes response and a redraw
+      // before the click — otherwise the click can fire on a stale
+      // frame and miss the polygon.
+      { kind: 'wait', durationMs: 1200 },
+      {
+        kind: 'click',
+        target: { kind: 'point', x: 1140, y: 605 },
+        durationMs: 700,
+      },
+      { kind: 'cursorScale', scale: 1, durationMs: 280 },
+      // Linger so the climax cue lands on the right-pane reveal.
+      { kind: 'wait', durationMs: 1500 },
+    ],
+  },
+
+  // -- Scene 4b: right-pane payoff -----------------------------------
+  // 4480ms cue, no during — the camera holds on the populated right pane
+  // for the whole climax line. Tail dwells before the export beat.
+  {
+    text: 'Stats, listings, Street View, price history — all in one pane.',
+    gapBeforeMs: 0,
+    tail: [{ kind: 'wait', durationMs: 1200 }],
+  },
+
+  // -- Scene 5: export ------------------------------------------------
+  // 1760ms cue. zoomReset + click together fit (1700ms); 60ms padding.
+  {
+    text: 'Take the shortlist into Excel.',
+    gapBeforeMs: 500,
+    during: [
+      { kind: 'zoomReset', durationMs: 900 },
+      {
+        kind: 'click',
+        target: el('button[title="Export to Excel"]'),
+        durationMs: 800,
+      },
+    ],
+    tail: [{ kind: 'wait', durationMs: 800 }],
+  },
+
+  // -- Scene 6: outro -------------------------------------------------
+  {
+    text: `${BRAND.name}. ${BRAND.tagline}`,
+    gapBeforeMs: 600,
+    during: [
+      {
+        kind: 'showOutro',
+        brand: BRAND.name,
+        tagline: BRAND.tagline,
+        url: BRAND.url,
+        durationMs: 0,
+      },
+    ],
+    tail: [{ kind: 'wait', durationMs: 1500 }],
+  },
+];
+
+const DEFAULT_PRE: Storyboard['pre'] = [
  // Camera push-in to the AI box happens before the first caption — silent
  // setup keeps the cold open from feeling rushed.
-  pre: [
-    { kind: 'clearVignette', durationMs: 0 },
-    { kind: 'wait', durationMs: 200 },
-    {
-      kind: 'zoomTo',
-      target: el('[data-tutorial="ai-filters"]'),
-      scale: AI_ZOOM_SCALE,
-      durationMs: 1300,
-    },
-    { kind: 'wait', durationMs: 140 },
-  ],
+  { kind: 'clearVignette', durationMs: 0 },
+  { kind: 'wait', durationMs: 200 },
+  {
+    kind: 'zoomTo',
+    target: el('[data-tutorial="ai-filters"]'),
+    scale: AI_ZOOM_SCALE,
+    durationMs: 1300,
+  },
+  { kind: 'wait', durationMs: 140 },
+];

-  cues: [
-    // -- Scene 1: AI prompt ----------------------------------------------
-    // Cue 0 is short (1920ms) — caption shows alone, then typing + submit
-    // happen silently in the tail. The natural beat is: viewer hears the
-    // brief, then watches the prompt being typed.
-    {
-      text: 'Describe the life you want.',
-      gapBeforeMs: 0,
-      tail: [
-        { kind: 'wait', durationMs: 140 },
+export const storyboards: Storyboard[] = [
+  {
+    name: 'recording',
+    video: {
+      aspect: '16x9',
+      captureScale: 1,
+      // 8M is enough for 1920x1080 at captureScale=1; bump to 18M when
+      // captureScale > 1 (supersampled) — see render.sh history if reviving
+      // higher-quality cuts.
+      webmBitrate: '8M',
+      outputFps: 50,
+      minDurationS: 10,
+      maxDurationS: 60,
+      // Right-pane inspection (~16s into the trimmed timeline) is the
+      // clearest paused-state preview: Manchester map, filters applied,
+      // right pane populated, larger narration caption visible.
+      posterTimeS: 16,
+    },
+    voice: {
+      instruct: BRITISH_MALE_NARRATOR,
+      language: 'English',
+      // Sampling pinned for cue-to-cue consistency. Lower temp/top_p make
+      // the decoder less likely to sample non-speech tokens (laughter,
+      // random noise) at the cost of slightly flatter intonation. Seed
+      // makes runs reproducible.
+      temperature: 0.6,
+      topP: 0.9,
+      seed: 42,
+    },
+    content: {
+      promptText: PROMPT_TEXT,
+      aiZoomScale: AI_ZOOM_SCALE,
+      // Initial map view used while we navigate. The AI scene zooms in on
+      // the sidebar so this only matters once we zoom out.
+      initialMapView: { lat: 53.4795, lon: -2.2451, zoom: 11.5 },
+      // Filters returned by the AI stub. Keys MUST match real feature names
+      // from /api/features (verified against the running server's schema).
+      stubbedFilters: {
+        'Property type': ['Flats/Maisonettes', 'Terraced'],
+        'Estimated current price': [175000, 450000],
+        'Serious crime per 1k residents (avg/yr)': [0, 55],
+        'Noise (dB)': [50, 68],
+      },
+      // Travel-time filters returned by the AI stub. Slug matches the real
+      // /api/travel-destinations?mode=transit response.
+      stubbedTravelTimeFilters: [
        {
-          kind: 'type',
-          selector: '[data-tutorial="ai-filters"] textarea',
-          text: PROMPT_TEXT,
-          durationMs: 3000,
-        },
-        { kind: 'wait', durationMs: 140 },
-        { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 },
-        { kind: 'wait', durationMs: 700 },
-      ],
-    },
-
-    // -- Scene 2: zoom out reveal ---------------------------------------
-    {
-      text: 'Every matching neighbourhood, side by side.',
-      gapBeforeMs: 400,
-      during: [{ kind: 'zoomReset', durationMs: 1400 }],
-      tail: [{ kind: 'wait', durationMs: 1200 }],
-    },
-
-    // -- Scene 3: travel-time slider ------------------------------------
-    {
-      text: `Tighten the commute to ${TT_DRAG_TO_MIN} minutes.`,
-      gapBeforeMs: 500,
-      during: [
-        {
-          kind: 'dragSlider',
-          thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
-          trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
-          toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
-          durationMs: 1400,
+          mode: 'transit',
+          slug: 'manchester',
+          label: 'Manchester city centre',
+          max: TT_DRAG_FROM_MIN,
        },
      ],
-      tail: [{ kind: 'wait', durationMs: 1200 }],
+      travelTimeCardSelector: TT_CARD_SELECTOR,
+      travelTimeSliderMax: TT_SLIDER_MAX,
+      travelTimeDragFromMin: TT_DRAG_FROM_MIN,
+      travelTimeDragToMin: TT_DRAG_TO_MIN,
+      brand: BRAND,
    },
+    pre: DEFAULT_PRE,
+    cues: DEFAULT_CUES,
+  },
+];

-    // -- Scene 4a: deep zoom into a hexagon -----------------------------
-    // The mapZoom barely fits (1500ms vs cue 1840ms); cursor prep happens
-    // earlier in this cue's during, the click + payoff dwell are in tail.
-    {
-      text: 'Drill into a single block.',
-      gapBeforeMs: 500,
-      during: [
-        { kind: 'cursorScale', scale: 1.4, durationMs: 200 },
-        {
-          kind: 'mapZoom',
-          target: { kind: 'point', x: 1140, y: 605 },
-          steps: 18,
-          durationMs: 1500,
-        },
-      ],
-      tail: [
-        // Wait for the post-zoom /api/postcodes response and a redraw
-        // before the click — otherwise the click can fire on a stale
-        // frame and miss the polygon.
-        { kind: 'wait', durationMs: 1200 },
-        {
-          kind: 'click',
-          target: { kind: 'point', x: 1140, y: 605 },
-          durationMs: 700,
-        },
-        { kind: 'cursorScale', scale: 1, durationMs: 280 },
-        // Linger so the climax cue lands on the right-pane reveal.
-        { kind: 'wait', durationMs: 1500 },
-      ],
-    },
-
-    // -- Scene 4b: right-pane payoff -----------------------------------
-    // 4480ms cue, no during — the camera holds on the populated right pane
-    // for the whole climax line. Tail dwells before the export beat.
-    {
-      text: 'Stats, listings, Street View, price history — all in one pane.',
-      gapBeforeMs: 0,
-      tail: [{ kind: 'wait', durationMs: 1200 }],
-    },
-
-    // -- Scene 5: export ------------------------------------------------
-    // 1760ms cue. zoomReset + click together fit (1700ms); 60ms padding.
-    {
-      text: 'Take the shortlist into Excel.',
-      gapBeforeMs: 500,
-      during: [
-        { kind: 'zoomReset', durationMs: 900 },
-        {
-          kind: 'click',
-          target: el('button[title="Export to Excel"]'),
-          durationMs: 800,
-        },
-      ],
-      tail: [{ kind: 'wait', durationMs: 800 }],
-    },
-
-    // -- Scene 6: outro -------------------------------------------------
-    {
-      text: `${BRAND_NAME}. ${BRAND_TAGLINE}`,
-      gapBeforeMs: 600,
-      during: [
-        {
-          kind: 'showOutro',
-          brand: BRAND_NAME,
-          tagline: BRAND_TAGLINE,
-          url: BRAND_URL,
-          durationMs: 0,
-        },
-      ],
-      tail: [{ kind: 'wait', durationMs: 1500 }],
-    },
-  ],
-};
+export function getStoryboard(name: string): Storyboard {
+  const sb = storyboards.find((s) => s.name === name);
+  if (!sb) {
+    throw new Error(
+      `Unknown storyboard "${name}". Known: ${storyboards.map((s) => s.name).join(', ')}`
+    );
+  }
+  return sb;
+}
--- a/video/src/timeline.ts
+++ b/video/src/timeline.ts
@ -13,10 +13,13 @@ export type TimelineResult = RunnerResult;
 * recording chrome (cursor, zoom wrapper, caption layer). Also opens the
 * AI prompt textarea so the storyboard can begin typing immediately.
 */
-export async function prepareTimeline(page: Page): Promise<ScriptCtx> {
+export async function prepareTimeline(
+  page: Page,
+  storyboard: Storyboard
+): Promise<ScriptCtx> {
  const dashboard = new DashboardRecorder(page);
  const initialMapVersion = dashboard.getMapDataVersion();
-  await page.goto(dashboardUrl(), { waitUntil: 'domcontentloaded' });
+  await page.goto(dashboardUrl(storyboard), { waitUntil: 'domcontentloaded' });
  await page.waitForLoadState('load', { timeout: 15000 }).catch(() => {});
  await page
    .locator('[data-tutorial="ai-filters"]')
--- a/video/src/verify.ts
+++ b/video/src/verify.ts
@ -1,6 +1,8 @@
 import { execFileSync } from 'node:child_process';
 import { existsSync, statSync } from 'node:fs';
-import { MAX_DURATION_S, MIN_DURATION_S, OUTPUT_FPS, OUTPUT_DIR, VIDEO_SIZE } from './config.js';
+import { OUTPUT_DIR } from './config.js';
+import { viewportFor, type Storyboard } from './script.js';
+import { getStoryboard } from './storyboard.js';

 interface Probe {
  streams?: {
@ -48,7 +50,7 @@ function probe(path: string): Probe {
  return JSON.parse(raw) as Probe;
 }

-function verifyVideo(path: string) {
+function verifyVideo(path: string, storyboard: Storyboard) {
  if (!existsSync(path)) fail(`${path} is missing`);
  if (statSync(path).size === 0) fail(`${path} is empty`);

@ -56,18 +58,23 @@ function verifyVideo(path: string) {
  const stream = data.streams?.[0];
  if (!stream) fail(`${path} has no video stream`);

+  const expectedSize = viewportFor(storyboard.video);
+  const { minDurationS, maxDurationS, outputFps } = storyboard.video;
+
  const duration = Number(data.format?.duration ?? 0);
  const fps = parseRate(stream.avg_frame_rate || stream.r_frame_rate);
-  if (stream.width !== VIDEO_SIZE.width || stream.height !== VIDEO_SIZE.height) {
-    fail(`${path} is ${stream.width}x${stream.height}, expected ${VIDEO_SIZE.width}x${VIDEO_SIZE.height}`);
-  }
-  if (duration < MIN_DURATION_S || duration > MAX_DURATION_S) {
+  if (stream.width !== expectedSize.width || stream.height !== expectedSize.height) {
    fail(
-      `${path} duration is ${duration.toFixed(2)}s, expected ${MIN_DURATION_S}-${MAX_DURATION_S}s`
+      `${path} is ${stream.width}x${stream.height}, expected ${expectedSize.width}x${expectedSize.height}`
    );
  }
-  if (Math.abs(fps - OUTPUT_FPS) > 0.1) {
-    fail(`${path} is ${fps.toFixed(2)}fps, expected ${OUTPUT_FPS}fps`);
+  if (duration < minDurationS || duration > maxDurationS) {
+    fail(
+      `${path} duration is ${duration.toFixed(2)}s, expected ${minDurationS}-${maxDurationS}s`
+    );
+  }
+  if (Math.abs(fps - outputFps) > 0.1) {
+    fail(`${path} is ${fps.toFixed(2)}fps, expected ${outputFps}fps`);
  }

  console.log(
@ -81,8 +88,20 @@ function verifyImage(path: string) {
  console.log(`[verify] ${path}: ${statSync(path).size} bytes`);
 }

-const videoPath = process.argv[2] ?? `${OUTPUT_DIR}/recording.mp4`;
-const posterPath = process.argv[3] ?? (process.argv[2] ? undefined : `${OUTPUT_DIR}/poster.jpg`);
+// Usage:
+//   node dist/verify.js <storyboard> [videoPath] [posterPath]
+// Defaults: videoPath=output/<storyboard>/recording.mp4,
+//           posterPath=output/<storyboard>/poster.jpg.
+// If videoPath is given but posterPath is not, the poster check is skipped.
+const storyboardName = process.argv[2];
+if (!storyboardName) {
+  fail('verify: missing <storyboard> argument (e.g. `node dist/verify.js recording`)');
+}
+const storyboard = getStoryboard(storyboardName);

-verifyVideo(videoPath);
+const videoPath = process.argv[3] ?? `${OUTPUT_DIR}/${storyboard.name}/recording.mp4`;
+const posterPath =
+  process.argv[4] ?? (process.argv[3] ? undefined : `${OUTPUT_DIR}/${storyboard.name}/poster.jpg`);
+
+verifyVideo(videoPath, storyboard);
 if (posterPath) verifyImage(posterPath);
--- a/video/src/video.ts
+++ b/video/src/video.ts
@ -1,10 +1,12 @@
 import { execSync } from 'node:child_process';
 import { renameSync, statSync } from 'node:fs';
-import { LEAD_IN_S, MAX_DURATION_S, OUTPUT_FPS, VIDEO_SIZE, WEBM_BITRATE } from './config.js';
+import { LEAD_IN_S } from './config.js';
+import { viewportFor, type Storyboard } from './script.js';

 export function trimRecording(
  rawPath: string,
  trimmedPath: string,
+  storyboard: Storyboard,
  times: { recordStartMs: number; sceneStartMs: number; sceneEndMs: number }
 ) {
  const sceneSpan = (times.sceneEndMs - times.sceneStartMs) / 1000;
@ -16,22 +18,26 @@ export function trimRecording(
  const wallDuration = trimEnd - trimStart;
  const finalDuration = wallDuration;

-  if (finalDuration > MAX_DURATION_S) {
+  const { outputFps, webmBitrate, maxDurationS } = storyboard.video;
+  const viewport = viewportFor(storyboard.video);
+
+  if (finalDuration > maxDurationS) {
    console.log(
-      `Scene output duration is ${finalDuration.toFixed(2)}s (guard ${MAX_DURATION_S.toFixed(2)}s); keeping the full take.`
+      `[${storyboard.name}] Scene output duration is ${finalDuration.toFixed(2)}s ` +
+        `(guard ${maxDurationS.toFixed(2)}s); keeping the full take.`
    );
  }

  const filter =
    `trim=start=${trimStart.toFixed(3)}:duration=${wallDuration.toFixed(3)},` +
-    `setpts=PTS-STARTPTS,fps=${OUTPUT_FPS},` +
+    `setpts=PTS-STARTPTS,fps=${outputFps},` +
    `trim=duration=${finalDuration.toFixed(3)},setpts=PTS-STARTPTS`;

  // Keep trimming inside the filter graph: it is frame-accurate for WebM
  // without the keyframe leakage of input seeking.
  execSync(
    `ffmpeg -y -i "${rawPath}" -vf "${filter}" ` +
-      `-fps_mode cfr -r ${OUTPUT_FPS} -c:v libvpx -b:v ${WEBM_BITRATE} -deadline good -cpu-used 5 ` +
+      `-fps_mode cfr -r ${outputFps} -c:v libvpx -b:v ${webmBitrate} -deadline good -cpu-used 5 ` +
      `"${trimmedPath}"`,
    { stdio: 'inherit' }
  );
@ -44,6 +50,6 @@ export function trimRecording(
  }

  console.log(
-    `Wrote ${trimmedPath} (${finalDuration.toFixed(2)}s, scene=${sceneSpan.toFixed(2)}s, capture=${VIDEO_SIZE.width}x${VIDEO_SIZE.height})`
+    `[${storyboard.name}] Wrote ${trimmedPath} (${finalDuration.toFixed(2)}s, scene=${sceneSpan.toFixed(2)}s, capture=${viewport.width}x${viewport.height})`
  );
 }
--- a/video/tts/mux.py
+++ b/video/tts/mux.py
@ -1,19 +1,19 @@
-"""Mux per-cue WAVs into recording.mp4 at their narration offsets.
+"""Mux per-cue WAVs into one storyboard's recording.mp4 at narration offsets.

-Reads two manifests:
+Reads two manifests inside ``output/<storyboard>/``:

-* ``output/audio/index.json`` (synth output) — per-cue WAV filename + measured
+* ``audio/index.json`` (synth output) — per-cue WAV filename + measured
  duration. Generated BEFORE recording in one batched Qwen3-TTS call.
-* ``output/narration.json`` (recorder output) — per-cue ``videoTimeMs`` against
+* ``narration.json`` (recorder output) — per-cue ``videoTimeMs`` against
  the trimmed video. Generated DURING recording.

 Joins them by ``cueIndex`` (index in the cue list, 1:1 between manifests),
 runs ffmpeg with one ``adelay`` per cue plus a single ``amix``, copies the
-video stream, and writes ``output/recording.narrated.mp4``.
+video stream, and writes ``output/<storyboard>/recording.narrated.mp4``.

 Run from the ``video/`` directory after recording:

-    uv run --project tts python tts/mux.py
+    uv run --project tts python tts/mux.py --storyboard recording
 """

 from __future__ import annotations
@ -28,23 +28,21 @@ from pathlib import Path

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--audio-dir", type=Path, default=Path("output/audio"))
    parser.add_argument(
-        "--narration",
-        type=Path,
-        default=Path("output/narration.json"),
-        help="Per-cue videoTimeMs manifest written by the recorder.",
+        "--storyboard",
+        required=True,
+        help="Storyboard slug (matches Storyboard.name in src/storyboard.ts).",
    )
-    parser.add_argument("--video", type=Path, default=Path("output/recording.mp4"))
    parser.add_argument(
-        "--out",
+        "--output-dir",
        type=Path,
-        default=Path("output/recording.narrated.mp4"),
+        default=Path("output"),
+        help="Root output directory; per-storyboard files live in <root>/<storyboard>/.",
    )
    parser.add_argument(
        "--replace",
        action="store_true",
-        help="After muxing, atomically replace --video with --out.",
+        help="After muxing, atomically replace the storyboard's recording.mp4.",
    )
    return parser.parse_args()

@ -56,7 +54,13 @@ def main() -> int:
        print("[mux] ffmpeg not on PATH", file=sys.stderr)
        return 1

-    audio_index_path = args.audio_dir / "index.json"
+    storyboard_dir = args.output_dir / args.storyboard
+    audio_dir = storyboard_dir / "audio"
+    narration_path = storyboard_dir / "narration.json"
+    video_path = storyboard_dir / "recording.mp4"
+    out_path = storyboard_dir / "recording.narrated.mp4"
+
+    audio_index_path = audio_dir / "index.json"
    if not audio_index_path.exists():
        print(
            f"[mux] {audio_index_path} not found; run tts/synth.py first",
@ -64,25 +68,25 @@ def main() -> int:
        )
        return 1

-    if not args.narration.exists():
+    if not narration_path.exists():
        print(
-            f"[mux] {args.narration} not found; the recorder must run before mux",
+            f"[mux] {narration_path} not found; the recorder must run before mux",
            file=sys.stderr,
        )
        return 1

-    if not args.video.exists():
-        print(f"[mux] video not found: {args.video}", file=sys.stderr)
+    if not video_path.exists():
+        print(f"[mux] video not found: {video_path}", file=sys.stderr)
        return 1

    audio_index = json.loads(audio_index_path.read_text())
    audio_items = [it for it in audio_index.get("items", []) if it.get("wav")]
    if not audio_items:
        print("[mux] synth produced no cues; copying video unchanged", file=sys.stderr)
-        shutil.copyfile(args.video, args.out)
+        shutil.copyfile(video_path, out_path)
        return 0

-    narration = json.loads(args.narration.read_text())
+    narration = json.loads(narration_path.read_text())
    nar_cues = list(narration.get("cues", []))
    if len(nar_cues) != len(audio_items):
        print(
@ -130,9 +134,9 @@ def main() -> int:
            + "\n  - ".join(overlaps)
        )

-    cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(args.video)]
+    cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(video_path)]
    for it in items:
-        cmd += ["-i", str(args.audio_dir / it["wav"])]
+        cmd += ["-i", str(audio_dir / it["wav"])]

    filter_parts: list[str] = []
    mix_inputs: list[str] = []
@ -168,18 +172,21 @@ def main() -> int:
        "-shortest",
        "-movflags",
        "+faststart",
-        str(args.out),
+        str(out_path),
    ]

-    print(f"[mux] muxing {len(items)} narration cues into {args.out}", flush=True)
+    print(
+        f"[mux] [{args.storyboard}] muxing {len(items)} narration cues into {out_path}",
+        flush=True,
+    )
    result = subprocess.run(cmd)
    if result.returncode != 0:
        print(f"[mux] ffmpeg exited {result.returncode}", file=sys.stderr)
        return result.returncode

    if args.replace:
-        args.out.replace(args.video)
-        print(f"[mux] replaced {args.video} with narrated copy", flush=True)
+        out_path.replace(video_path)
+        print(f"[mux] replaced {video_path} with narrated copy", flush=True)

    return 0

--- a/video/tts/synth.py
+++ b/video/tts/synth.py
@ -1,15 +1,28 @@
-"""Synthesize the full narration in ONE batched Qwen3-TTS call.
+"""Synthesize one storyboard's narration in ONE batched Qwen3-TTS call.

-Reads ``output/narration-script.json`` (emitted by ``dist/preflight.js``) and
-runs ``Qwen3TTSModel.generate_custom_voice`` with all cue texts as a single
-batched list — that way every cue shares the same model state, which keeps
-prosody and timbre consistent across cues. Per-cue WAVs and an index manifest
-go to ``output/audio/`` for the recording step (which reads measured cue
-durations) and the mux step (which drops each WAV at its videoTime).
+Reads ``output/<storyboard>/narration-script.json`` (emitted by
+``dist/preflight.js``) and runs ``Qwen3TTSModel.generate_voice_design`` with
+all cue texts as a single batched list — that way every cue shares the same
+model state, which keeps prosody and timbre consistent across cues. Per-cue
+WAVs and an index manifest go to ``output/<storyboard>/audio/`` for the
+recording step (which reads measured cue durations) and the mux step (which
+drops each WAV at its videoTime).
+
+Voice persona, language, and sampling come from the storyboard via the
+``voice`` block of the narration script. CLI flags can still override them
+for ad-hoc experimentation; storyboards remain the source of truth for
+production runs.
+
+We use the VoiceDesign sibling of CustomVoice because it accepts a free-form
+voice persona (British accent, narrator register, "no laughter") via the
+``instruct`` parameter. CustomVoice's preset speakers are all American or
+non-English, and its ``instruct`` is documented for emotion only — it
+ignored accent directives and bled non-speech tokens (laughter, sighs)
+between cues.

 Run from the ``video/`` directory:

-    uv run --project tts python tts/synth.py
+    uv run --project tts python tts/synth.py --storyboard recording
 """

 from __future__ import annotations
@ -17,55 +30,78 @@ from __future__ import annotations
 import argparse
 import json
 import os
+import random
 import sys
 from pathlib import Path

+import numpy as np
 import soundfile as sf
 import torch
 from qwen_tts import Qwen3TTSModel


-DEFAULT_MODEL = "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
-DEFAULT_SPEAKER = "ryan"
-DEFAULT_LANGUAGE = "English"
+# Two checkpoints: the design model mints the reference clip in the desired
+# persona; the clone model conditions every cue on that reference's x-vector.
+# Neither CustomVoice nor VoiceDesign support generate_voice_clone — only the
+# Base checkpoint does.
+DEFAULT_DESIGN_MODEL = "Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign"
+DEFAULT_CLONE_MODEL = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
+
+# Fixed reference utterance used to anchor the speaker timbre. The reference
+# is generated once per (model, instruct, sampling, seed) tuple and reused
+# for every cue, so all narration shares the same x-vector. Two short
+# sentences exercise enough phonemes for a stable embedding without bloating
+# generation time.
+REFERENCE_TEXT = (
+    "Welcome to the demonstration. This is the narrator voice you'll hear throughout the video."
+)
+
+
+def _safe_load_json(path: Path) -> object | None:
+    try:
+        return json.loads(path.read_text())
+    except (FileNotFoundError, json.JSONDecodeError):
+        return None


 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
-        "--script",
+        "--storyboard",
+        required=True,
+        help="Storyboard slug (matches Storyboard.name in src/storyboard.ts).",
+    )
+    parser.add_argument(
+        "--output-dir",
        type=Path,
-        default=Path("output/narration-script.json"),
-        help="Narration script emitted by dist/preflight.js.",
+        default=Path("output"),
+        help="Root output directory; per-storyboard files live in <root>/<storyboard>/.",
    )
    parser.add_argument(
-        "--out-dir",
+        "--design-model",
+        default=os.environ.get("TTS_DESIGN_MODEL", DEFAULT_DESIGN_MODEL),
+        help="Checkpoint used to mint the voice reference (VoiceDesign by default).",
+    )
+    parser.add_argument(
+        "--clone-model",
+        default=os.environ.get("TTS_CLONE_MODEL", DEFAULT_CLONE_MODEL),
+        help="Checkpoint used to clone the cue audio from the reference (Base by default).",
+    )
+    parser.add_argument(
+        "--reference-audio",
        type=Path,
-        default=Path("output/audio"),
-        help="Directory to write WAV files and index.json into.",
+        default=(Path(os.environ["TTS_REFERENCE_AUDIO"]) if os.environ.get("TTS_REFERENCE_AUDIO") else None),
+        help="Path to an existing reference WAV. If set, skip VoiceDesign and clone from this.",
    )
    parser.add_argument(
-        "--model",
-        default=os.environ.get("TTS_MODEL", DEFAULT_MODEL),
-    )
-    parser.add_argument(
-        "--speaker",
-        default=os.environ.get("TTS_SPEAKER", DEFAULT_SPEAKER),
-        help="CustomVoice preset speaker name (use --list-speakers to enumerate).",
-    )
-    parser.add_argument(
-        "--language",
-        default=os.environ.get("TTS_LANGUAGE", DEFAULT_LANGUAGE),
+        "--reference-text",
+        default=os.environ.get("TTS_REFERENCE_TEXT"),
+        help="Transcript of --reference-audio. Required if --reference-audio is set.",
    )
    parser.add_argument(
        "--device",
        default=os.environ.get("TTS_DEVICE", "cuda:0"),
    )
-    parser.add_argument(
-        "--list-speakers",
-        action="store_true",
-        help="Load the model, print available speaker names, and exit.",
-    )
    return parser.parse_args()


@ -78,15 +114,18 @@ def load_model(model_id: str, device: str) -> Qwen3TTSModel:
 def cached_index_matches(
    index_path: Path,
    cues: list[dict],
-    speaker: str,
+    instruct: str,
    language: str,
+    seed: int,
+    temperature: float,
+    top_p: float,
 ) -> bool:
    """Return True iff index_path's cue list lines up with `cues` 1:1.

    Compared fields: ``cueIndex``, ``text``, ``gapBeforeMs`` plus the synth
-    settings (``speaker``, ``language``). All cue WAV files must also exist
-    on disk. Mismatched length, reordered cues, or a missing WAV invalidate
-    the cache.
+    settings (``instruct``, ``language``, ``seed``, ``temperature``, ``top_p``).
+    All cue WAV files must also exist on disk. Mismatched length, reordered
+    cues, or a missing WAV invalidate the cache.
    """
    if not index_path.exists():
        return False
@ -94,7 +133,13 @@ def cached_index_matches(
        cached = json.loads(index_path.read_text())
    except json.JSONDecodeError:
        return False
-    if cached.get("speaker") != speaker or cached.get("language") != language:
+    if cached.get("instruct") != instruct or cached.get("language") != language:
+        return False
+    if int(cached.get("seed", -1)) != seed:
+        return False
+    if float(cached.get("temperature", -1)) != temperature:
+        return False
+    if float(cached.get("topP", -1)) != top_p:
        return False
    cached_items = cached.get("items", [])
    if len(cached_items) != len(cues):
@ -112,52 +157,179 @@ def cached_index_matches(
    return True


+def seed_everything(seed: int) -> None:
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+
+def _resolve_reference(
+    args: argparse.Namespace,
+    audio_dir: Path,
+    instruct: str,
+    language: str,
+    seed: int,
+    temperature: float,
+    top_p: float,
+) -> tuple[Path, str]:
+    """Return (ref_wav_path, ref_text) for the clone step.
+
+    If --reference-audio is supplied, validate and use it directly. Otherwise
+    mint one via VoiceDesign (cached on disk; cache invalidates when the
+    persona/sampling/seed changes). The design model is unloaded before
+    returning so the clone model can claim the GPU.
+    """
+    if args.reference_audio is not None:
+        if not args.reference_audio.exists():
+            raise SystemExit(f"[synth] --reference-audio does not exist: {args.reference_audio}")
+        if not args.reference_text:
+            raise SystemExit("[synth] --reference-text is required when --reference-audio is set")
+        print(
+            f"[synth] using user-supplied reference {args.reference_audio} «{args.reference_text}»",
+            flush=True,
+        )
+        return args.reference_audio, args.reference_text
+
+    ref_wav_path = audio_dir / "_reference.wav"
+    ref_meta_path = audio_dir / "_reference.meta.json"
+    ref_meta = {
+        "model": args.design_model,
+        "instruct": instruct,
+        "language": language,
+        "seed": seed,
+        "temperature": temperature,
+        "topP": top_p,
+        "text": REFERENCE_TEXT,
+    }
+    if (
+        ref_wav_path.exists()
+        and ref_meta_path.exists()
+        and _safe_load_json(ref_meta_path) == ref_meta
+    ):
+        print(f"[synth] reusing cached voice reference {ref_wav_path.name}", flush=True)
+        return ref_wav_path, REFERENCE_TEXT
+
+    print(
+        f"[synth] minting voice reference via VoiceDesign: «{REFERENCE_TEXT}»",
+        flush=True,
+    )
+    design_model = load_model(args.design_model, args.device)
+    seed_everything(seed)
+    ref_wavs, ref_sr = design_model.generate_voice_design(
+        text=[REFERENCE_TEXT],
+        language=language,
+        instruct=instruct,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
+    )
+    ref_audio = ref_wavs[0]
+    if hasattr(ref_audio, "cpu"):
+        ref_audio = ref_audio.cpu().float().numpy()
+    sf.write(str(ref_wav_path), ref_audio, ref_sr)
+    ref_meta_path.write_text(json.dumps(ref_meta, indent=2))
+
+    # Free the design model before loading the clone model — both are 1.7B,
+    # we don't want them resident at the same time.
+    del design_model
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
+    return ref_wav_path, REFERENCE_TEXT
+
+
 def main() -> int:
    args = parse_args()

-    if args.list_speakers:
-        model = load_model(args.model, args.device)
-        speakers = model.get_supported_speakers()
-        print(json.dumps(speakers, indent=2, ensure_ascii=False))
-        return 0
+    storyboard_dir = args.output_dir / args.storyboard
+    script_path = storyboard_dir / "narration-script.json"
+    audio_dir = storyboard_dir / "audio"

-    if not args.script.exists():
-        print(f"[synth] script not found: {args.script}", file=sys.stderr)
+    if not script_path.exists():
+        print(f"[synth] script not found: {script_path}", file=sys.stderr)
        return 1

-    script = json.loads(args.script.read_text())
+    script = json.loads(script_path.read_text())
    cues = [c for c in script.get("items", []) if c.get("text", "").strip()]
    if not cues:
        print("[synth] script has no cues; nothing to generate.", file=sys.stderr)
        return 1

-    args.out_dir.mkdir(parents=True, exist_ok=True)
+    voice = script.get("voice")
+    if not voice:
+        print(
+            f"[synth] {script_path} has no `voice` block — re-run preflight.",
+            file=sys.stderr,
+        )
+        return 1
+    instruct = voice["instruct"]
+    language = voice["language"]
+    temperature = float(voice.get("temperature", 0.6))
+    top_p = float(voice.get("topP", 0.9))
+    seed = int(voice.get("seed", 42))
+
+    audio_dir.mkdir(parents=True, exist_ok=True)

    # Skip generation when the existing audio matches the script — same cue
-    # texts and same gapBeforeMs values in the same order. Saves ~30s of GPU
-    # time when iterating on activity timing without changing narration.
-    if cached_index_matches(args.out_dir / "index.json", cues, args.speaker, args.language):
+    # texts and same gapBeforeMs values in the same order, AND same synth
+    # settings (instruct/seed/temperature/top_p). Saves ~30s of GPU time when
+    # iterating on activity timing without changing narration or persona.
+    if cached_index_matches(
+        audio_dir / "index.json",
+        cues,
+        instruct,
+        language,
+        seed,
+        temperature,
+        top_p,
+    ):
        print(
-            f"[synth] cached audio in {args.out_dir} matches the current script — skipping generation",
+            f"[synth] [{args.storyboard}] cached audio matches the current script — skipping generation",
            flush=True,
        )
        return 0

-    model = load_model(args.model, args.device)
-
    texts = [c["text"].strip() for c in cues]
-    print(f"[synth] generating {len(texts)} cues in one batched call", flush=True)
+    print(f"[synth] [{args.storyboard}] persona: {instruct}", flush=True)
+    print(
+        f"[synth] [{args.storyboard}] sampling: temperature={temperature} top_p={top_p} seed={seed} language={language}",
+        flush=True,
+    )
+
+    # Two-stage generation:
+    #   1. VoiceDesign mints a single reference clip in the target persona
+    #      (or the user supplies one via --reference-audio).
+    #   2. Base + generate_voice_clone(x_vector_only_mode=True) conditions
+    #      every cue on the reference's speaker embedding.
+    # Without (2), batched generation drifts timbre across cues — a persona
+    # prompt anchors style but not identity, so each batch item picks its
+    # own voice. The reference WAV is cached so subsequent runs only load
+    # the clone model (saves ~20s + 3.4 GB of disk download).
+    ref_wav_path, ref_text = _resolve_reference(
+        args, audio_dir, instruct, language, seed, temperature, top_p
+    )
+
+    print(
+        f"[synth] cloning {len(texts)} cues from reference (x_vector_only) — one batched call",
+        flush=True,
+    )
    for i, t in enumerate(texts):
        print(f"[synth]   {i:2d}: {t}", flush=True)

-    # ONE batched call. generate_custom_voice handles text=List[str] natively
-    # and broadcasts the speaker/language across all items, so the entire
-    # narration is decoded in one model pass — same RNG state, same batch,
-    # consistent voice from cue to cue.
-    wavs, sr = model.generate_custom_voice(
+    clone_model = load_model(args.clone_model, args.device)
+    seed_everything(seed)
+    wavs, sr = clone_model.generate_voice_clone(
        text=texts,
-        language=args.language,
-        speaker=args.speaker,
+        language=language,
+        ref_audio=str(ref_wav_path),
+        ref_text=ref_text,
+        x_vector_only_mode=True,
+        non_streaming_mode=True,
+        do_sample=True,
+        temperature=temperature,
+        top_p=top_p,
    )
    if len(wavs) != len(texts):
        print(
@ -171,7 +343,7 @@ def main() -> int:
        if hasattr(audio, "cpu"):
            audio = audio.cpu().float().numpy()
        wav_name = f"cue_{cue['cueIndex']:03d}.wav"
-        wav_path = args.out_dir / wav_name
+        wav_path = audio_dir / wav_name
        sf.write(str(wav_path), audio, sr)
        duration_ms = int(round(len(audio) * 1000 / sr))
        items.append(
@ -190,15 +362,21 @@ def main() -> int:
        )

    out_index = {
-        "speaker": args.speaker,
-        "language": args.language,
-        "model": args.model,
+        "storyboard": args.storyboard,
+        "instruct": instruct,
+        "language": language,
+        "designModel": args.design_model,
+        "cloneModel": args.clone_model,
+        "referenceText": ref_text,
+        "seed": seed,
+        "temperature": temperature,
+        "topP": top_p,
        "items": items,
    }
-    (args.out_dir / "index.json").write_text(json.dumps(out_index, indent=2))
+    (audio_dir / "index.json").write_text(json.dumps(out_index, indent=2))
    total_ms = sum(it["gapBeforeMs"] + it["durationMs"] for it in items)
    print(
-        f"[synth] {len(items)} cues, {total_ms}ms of audio (incl. gaps) -> {args.out_dir}",
+        f"[synth] [{args.storyboard}] {len(items)} cues, {total_ms}ms of audio (incl. gaps) -> {audio_dir}",
        flush=True,
    )
    return 0