alright

2026-05-26 19:45:13 +01:00 · 2026-05-26 19:45:13 +01:00 · 39ef5c6646
commit 39ef5c6646
parent c645b0f1d4
79 changed files with 5660 additions and 2199 deletions
--- a/video/package.json
+++ b/video/package.json
@ -10,6 +10,7 @@
    "setup-auth": "tsc && node dist/auth.js",
    "record": "tsc && node dist/record.js",
    "verify-output": "tsc && node dist/verify.js",
+    "review": "./review.sh",
    "render": "./render.sh"
  },
  "dependencies": {
--- a/video/render.sh
+++ b/video/render.sh
@ -14,9 +14,11 @@
 #                     bootstrap step; you supply real account credentials.
 #
 # Usage:
-#   ./render.sh                       # local stack
-#   ./render.sh --prod                # prod (requires LOGIN_EMAIL/LOGIN_PASSWORD)
+#   ./render.sh                       # local stack, English homepage landscape + portrait
+#   ./render.sh --prod                # prod, English homepage landscape + portrait
 #   ./render.sh --target prod         # same as --prod
+#   VIDEO_STORYBOARD_SET=ads ./render.sh --prod   # render social ads instead
+#   VIDEO_STORYBOARD_SET=demo ./render.sh --prod  # render every homepage locale
 #   ./render.sh --fresh-auth          # force re-auth even if cache is fresh
 #   ./render.sh --resume              # preserve completed recordings and continue
 #   ./render.sh --no-encode           # stop at WebM, skip MP4 encode
@ -312,6 +314,20 @@ if [ "$DO_AUDIO" = "1" ]; then
  say "Synchronising tts/ Python deps"
  uv sync --project tts ${uv_sync_extras[@]+"${uv_sync_extras[@]}"} || fail "uv sync failed in video/tts"

+  if [ -z "${TTS_DEVICE:-}" ]; then
+    if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L >/dev/null 2>&1; then
+      gpu_free_mb="$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' ')"
+      if [ "${gpu_free_mb:-0}" -ge 8000 ]; then
+        export TTS_DEVICE="cuda:0"
+      else
+        export TTS_DEVICE="cpu"
+        say "GPU has ${gpu_free_mb:-0}MiB free; using CPU for TTS to avoid CUDA OOM"
+      fi
+    else
+      export TTS_DEVICE="cpu"
+    fi
+  fi
+
  # Voice consistency: every ad in this set declares the same AD_VOICE
  # (instruct/seed/temperature/topP/referenceText). Even with seed-locked
  # VoiceDesign, independent invocations across processes can produce
@ -325,10 +341,17 @@ if [ "$DO_AUDIO" = "1" ]; then
  # which it always does, because every ad shares AD_VOICE.
  shared_ref_wav=""
  shared_ref_meta=""
+  shared_audio_dir=""
  for sb in "${STORYBOARDS[@]}"; do
-    if [ -n "$shared_ref_wav" ] && [ -f "$shared_ref_wav" ] && [ -f "$shared_ref_meta" ]; then
+    if [ -n "$shared_audio_dir" ] && [ -d "$shared_audio_dir" ]; then
      mkdir -p "output/$sb/audio"
-      cp -f "$shared_ref_wav"  "output/$sb/audio/_reference.wav"
+      for cached_audio_file in "$shared_audio_dir"/*.wav "$shared_audio_dir"/*.json; do
+        [ -f "$cached_audio_file" ] || continue
+        cp -f "$cached_audio_file" "output/$sb/audio/$(basename "$cached_audio_file")"
+      done
+    elif [ -n "$shared_ref_wav" ] && [ -f "$shared_ref_wav" ] && [ -f "$shared_ref_meta" ]; then
+      mkdir -p "output/$sb/audio"
+      cp -f "$shared_ref_wav" "output/$sb/audio/_reference.wav"
      cp -f "$shared_ref_meta" "output/$sb/audio/_reference.meta.json"
    fi
    say "Synthesising narration for [$sb]"
@ -342,6 +365,9 @@ if [ "$DO_AUDIO" = "1" ]; then
      shared_ref_meta="output/$sb/audio/_reference.meta.json"
      say "Locked voice reference to $shared_ref_wav — reusing for the rest of the set"
    fi
+    if [ -z "$shared_audio_dir" ] && [ -s "output/$sb/audio/index.json" ]; then
+      shared_audio_dir="output/$sb/audio"
+    fi
  done
 fi

--- a/video/review.sh
+++ b/video/review.sh
@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+#
+# Extract visual and audio snippets from rendered homepage videos.
+#
+# Usage:
+#   ./review.sh                         # recording + recording-mobile
+#   ./review.sh recording ad-01-foo      # explicit storyboard slugs
+#
+# Outputs land under output/review/current by default. Override REVIEW_DIR
+# if you want to keep multiple passes side by side.
+
+set -euo pipefail
+
+cd "$(dirname "$0")"
+
+REVIEW_DIR="${REVIEW_DIR:-output/review/current}"
+mkdir -p "$REVIEW_DIR"
+
+if [ "$#" -gt 0 ]; then
+  STORYBOARDS=("$@")
+else
+  STORYBOARDS=(recording recording-mobile)
+fi
+
+for sb in "${STORYBOARDS[@]}"; do
+  src="output/$sb/recording.mp4"
+  if [ ! -s "$src" ]; then
+    echo "[review] missing rendered video: $src" >&2
+    exit 1
+  fi
+
+  width="$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=noprint_wrappers=1:nokey=1 "$src")"
+  height="$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=noprint_wrappers=1:nokey=1 "$src")"
+  scale=360
+  poster_t=16
+  if [ "$height" -gt "$width" ]; then
+    scale=240
+    poster_t=12
+  fi
+
+  ffprobe -v error \
+    -select_streams v:0 \
+    -show_entries stream=codec_name,width,height,avg_frame_rate \
+    -show_entries format=duration,size \
+    -of default=noprint_wrappers=1 \
+    "$src" > "$REVIEW_DIR/$sb-ffprobe.txt"
+
+  ffmpeg -nostdin -y -loglevel warning -i "$src" \
+    -vf "fps=1/4,scale=${scale}:-1:flags=lanczos,tile=5x3:padding=12:margin=12:color=white" \
+    -frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-contact.jpg"
+
+  ffmpeg -nostdin -y -loglevel warning -i "$src" -ss "$poster_t" \
+    -frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-postercheck-t${poster_t}.jpg"
+
+  ffmpeg -nostdin -y -loglevel warning -i "$src" -t 12 \
+    -vn -ac 1 -ar 24000 "$REVIEW_DIR/$sb-audio-first12.wav"
+done
+
+while IFS=$'\t' read -r sb idx clip_start clip_dur midpoint; do
+  src="output/$sb/recording.mp4"
+  cue="$(printf '%02d' "$idx")"
+
+  ffmpeg -nostdin -y -loglevel warning -i "$src" -ss "$midpoint" \
+    -frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-cue-$cue-mid.jpg"
+
+  ffmpeg -nostdin -y -loglevel warning -ss "$clip_start" -i "$src" -t "$clip_dur" \
+    -c:v libx264 -pix_fmt yuv420p -crf 18 -preset veryfast \
+    -c:a aac -b:a 128k -movflags +faststart \
+    "$REVIEW_DIR/$sb-cue-$cue.mp4"
+
+  ffmpeg -nostdin -y -loglevel warning -ss "$clip_start" -i "$src" -t "$clip_dur" \
+    -vn -ac 1 -ar 24000 "$REVIEW_DIR/$sb-cue-$cue.wav"
+done < <(node - "${STORYBOARDS[@]}" <<'NODE'
+const fs = require('fs');
+const storyboards = process.argv.slice(2);
+const review = process.env.REVIEW_DIR || 'output/review/current';
+
+for (const sb of storyboards) {
+  const narration = JSON.parse(fs.readFileSync(`output/${sb}/narration.json`, 'utf8'));
+  const audioPath = `output/${sb}/audio/index.json`;
+  const audio = fs.existsSync(audioPath)
+    ? JSON.parse(fs.readFileSync(audioPath, 'utf8'))
+    : { items: [] };
+  const byCue = new Map((audio.items || []).map((item) => [Number(item.cueIndex), item]));
+  const rows = ['cueIndex\tstartS\tendS\tdurationS\tgapBeforeMs\twav\ttext'];
+
+  narration.cues.forEach((cue, i) => {
+    const item = byCue.get(i) || {};
+    const startMs = Number(cue.videoTimeMs);
+    const durationMs = Number(item.durationMs || cue.durationMs);
+    const endMs = startMs + durationMs;
+
+    rows.push([
+      i,
+      (startMs / 1000).toFixed(3),
+      (endMs / 1000).toFixed(3),
+      (durationMs / 1000).toFixed(3),
+      item.gapBeforeMs ?? '',
+      item.wav ?? '',
+      cue.text,
+    ].join('\t'));
+
+    console.log([
+      sb,
+      i,
+      (Math.max(0, startMs - 250) / 1000).toFixed(3),
+      ((durationMs + 500) / 1000).toFixed(3),
+      ((startMs + durationMs / 2) / 1000).toFixed(3),
+    ].join('\t'));
+  });
+
+  fs.writeFileSync(`${review}/${sb}-timing.tsv`, rows.join('\n') + '\n');
+}
+NODE
+)
+
+echo "[review] wrote snippets to $REVIEW_DIR"
--- a/video/src/dashboard.ts
+++ b/video/src/dashboard.ts
@ -46,6 +46,9 @@ export interface HexagonClickTarget {

 type ApiKind = 'hexagons' | 'postcodes' | 'selection-stats' | 'tracked-api';

+const SELECTION_PANE_SELECTOR =
+  '[data-tutorial="right-pane"], .fixed.inset-0.z-50:has(button[aria-label="Close drawer"])';
+
 const TRACKED_API_PATHS = new Set([
  '/api/ai-filters',
  '/api/export',
@ -89,7 +92,8 @@ export class DashboardRecorder {

  async waitForSelectionReady(afterSelectionVersion: number, timeoutMs = 12000): Promise<void> {
    await this.page
-      .locator('[data-tutorial="right-pane"]')
+      .locator(SELECTION_PANE_SELECTOR)
+      .first()
      .waitFor({ state: 'visible', timeout: timeoutMs });
    await this.waitForStable({ afterSelectionVersion, timeoutMs });
  }
--- a/video/src/dom.ts
+++ b/video/src/dom.ts
@ -116,6 +116,18 @@ export async function installCursor(page: Page): Promise<void> {
      body.__demo-aspect-horizontal #__demo-caption {
        bottom: 7%;
      }
+      body.__demo-aspect-horizontal #__demo-caption.placement-side {
+        left: auto;
+        right: 3.4%;
+        bottom: 10%;
+        transform: translate(28px, 0);
+        max-width: min(560px, 30vw);
+        padding: 18px 22px;
+        border-radius: 18px;
+        font-size: 26px;
+        line-height: 1.18;
+        text-align: left;
+      }
      /* Vertical default: upper-third. Kept compact so the map remains the
         primary visual in the social ad cuts. */
      body.__demo-aspect-vertical #__demo-caption {
@ -130,6 +142,9 @@ export async function installCursor(page: Page): Promise<void> {
        opacity: 1;
        transform: translate(-50%, 0);
      }
+      body.__demo-aspect-horizontal #__demo-caption.placement-side.visible {
+        transform: translate(0, 0);
+      }

      #__demo-outro {
        position: fixed; inset: 0;
@ -565,13 +580,19 @@ export async function setAspectClass(
  }, aspect);
 }

-export async function showCaption(page: Page, text: string): Promise<void> {
-  await page.evaluate((t) => {
+export async function showCaption(
+  page: Page,
+  text: string,
+  placement?: 'side'
+): Promise<void> {
+  await page.evaluate(({ t, placement }) => {
    const el = document.getElementById('__demo-caption');
    if (!el) return;
    el.textContent = t;
+    el.classList.remove('placement-side');
+    if (placement) el.classList.add(`placement-${placement}`);
    el.classList.add('visible');
-  }, text);
+  }, { t: text, placement });
 }

 /**
--- a/video/src/motion.ts
+++ b/video/src/motion.ts
@ -74,11 +74,10 @@ export async function smoothMove(
 * "Fake" type: progressively set the textarea value, dispatching
 * React-compatible input events.
 *
- * Cadence is generated as a per-char weight ratio (so spaces and punctuation
- * read as natural pauses), then **rescaled** so that the sum of delays equals
- * `totalDurationMs` exactly. The runner depends on this: it budgets a
- * specific number of ms for the type step, and any divergence would cascade
- * into narration drift.
+ * Do not do one Playwright round-trip per character here. Long prompts can
+ * turn a 4s typing budget into 9s of wall-clock time on a busy recorder.
+ * Instead, animate through paced chunks. It still reads as typing on video,
+ * but the runner can keep narration and visuals aligned.
 */
 export async function fakeType(
  page: Page,
@ -86,17 +85,19 @@ export async function fakeType(
  text: string,
  totalDurationMs: number
 ): Promise<void> {
-  const steps = text.length;
-  if (steps === 0) {
+  if (text.length === 0) {
    if (totalDurationMs > 0) await sleep(totalDurationMs);
    return;
  }

-  const weights = computeTypingWeights(text);
-  const weightSum = weights.reduce((a, b) => a + b, 0);
-  const msPerWeight = totalDurationMs / weightSum;
+  const steps = Math.min(
+    text.length,
+    Math.max(1, Math.min(48, Math.round(totalDurationMs / 95)))
+  );
+  const startedAt = Date.now();

  for (let i = 1; i <= steps; i++) {
+    const charCount = Math.max(1, Math.round((i / steps) * text.length));
    await page.evaluate(
      ({ selector, value }) => {
        const ta = document.querySelector(selector) as HTMLTextAreaElement | null;
@ -110,27 +111,16 @@ export async function fakeType(
        setValue.call(ta, value);
        ta.dispatchEvent(new Event('input', { bubbles: true }));
      },
-      { selector, value: text.slice(0, i) }
+      { selector, value: text.slice(0, charCount) }
    );
    if (i < steps) {
-      const ms = Math.max(0, Math.round(weights[i - 1] * msPerWeight));
-      if (ms > 0) await sleep(ms);
+      const targetElapsed = (totalDurationMs * i) / steps;
+      const waitMs = startedAt + targetElapsed - Date.now();
+      if (waitMs > 0) await sleep(waitMs);
    }
  }
 }

-function computeTypingWeights(text: string): number[] {
-  const cadence = [0.82, 1.08, 0.94, 1.22, 0.88, 1.14, 0.98, 1.28];
-  return Array.from(text, (char, index) => {
-    let weight = cadence[index % cadence.length];
-    if (char === ' ') weight += 0.9;
-    if (/[,.!?;:]/.test(char)) weight += 1.8;
-    const next = text[index + 1];
-    if (next === ' ' && index % 4 === 0) weight += 0.55;
-    return weight;
-  });
-}
-
 /**
 * Drag the right-hand thumb of a Radix slider to a target track fraction.
 * Returns the final cursor position so callers can chain a smoothMove afterwards.
--- a/video/src/runner.ts
+++ b/video/src/runner.ts
@ -101,7 +101,7 @@ async function runCue(
    videoTimeMs: cursor.ms + leadInMs,
    durationMs: measuredAudioMs,
  });
-  await showCaption(ctx.page, cue.text);
+  await showCaption(ctx.page, cue.text, cue.captionPlacement);

  const during = cue.during ?? [];
  const declaredSum = during.reduce((s, a) => s + a.durationMs, 0);
@ -184,7 +184,36 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
      return;
    }
    case 'click': {
-      const to = await resolveTarget(ctx, step.target);
+      const selectionVersion = ctx.dashboard.getSelectionStatsVersion();
+      const candidates =
+        step.target.kind === 'hexagon' && step.waitForSelectionReady
+          ? await ctx.dashboard.visibleHexagonTargets(4)
+          : [await resolveTarget(ctx, step.target)];
+      let lastError: unknown = null;
+
+      for (let i = 0; i < candidates.length; i++) {
+        const to = candidates[i];
+        const moveMs = Math.max(120, Math.round(step.durationMs * 0.7));
+        await smoothMove(ctx.page, ctx.cursor, to, { durationMs: moveMs });
+        ctx.cursor = to;
+        await ctx.page.mouse.click(to.x, to.y);
+        if (!step.waitForSelectionReady) return;
+
+        try {
+          await ctx.dashboard.waitForSelectionReady(
+            selectionVersion,
+            Math.min(step.timeoutMs ?? 12000, i === candidates.length - 1 ? 12000 : 4000)
+          );
+          return;
+        } catch (err) {
+          lastError = err;
+        }
+      }
+      throw lastError ?? new Error('Click did not open the selection pane');
+    }
+    case 'clickIfVisible': {
+      const to = await tryResolveTarget(ctx, step.target, step.timeoutMs ?? 700);
+      if (!to) return;
      const moveMs = Math.max(120, Math.round(step.durationMs * 0.7));
      await smoothMove(ctx.page, ctx.cursor, to, { durationMs: moveMs });
      ctx.cursor = to;
@ -196,16 +225,109 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
      return;
    case 'mapZoom': {
      const point = await resolveTarget(ctx, step.target);
-      await ctx.page.mouse.move(point.x, point.y);
-      const perStepMs = Math.floor(step.durationMs / Math.max(1, step.steps));
+      const mapVersion = ctx.dashboard.getMapDataVersion();
      const delta = step.direction === 'out' ? -MAP_ZOOM_WHEEL_DELTA : MAP_ZOOM_WHEEL_DELTA;
-      for (let i = 0; i < step.steps; i++) {
-        await ctx.page.mouse.wheel(0, delta);
-        if (perStepMs > 0) await sleep(perStepMs);
+      const handled = await ctx.page.evaluate(
+        async ({ x, y, steps, durationMs, direction }) => {
+          const root = document.querySelector('.maplibregl-map') as HTMLElement | null;
+          const fiberKey = root
+            ? Object.getOwnPropertyNames(root).find((key) => key.startsWith('__reactFiber$'))
+            : undefined;
+          let fiber = fiberKey ? (root as unknown as Record<string, unknown>)[fiberKey] : null;
+          let mapRef: unknown = null;
+          while (fiber && typeof fiber === 'object') {
+            const maybeFiber = fiber as {
+              ref?: { current?: unknown };
+              return?: unknown;
+            };
+            const current = maybeFiber.ref?.current;
+            if (
+              current &&
+              typeof current === 'object' &&
+              typeof (current as { getMap?: unknown }).getMap === 'function'
+            ) {
+              mapRef = current;
+              break;
+            }
+            fiber = maybeFiber.return ?? null;
+          }
+
+          const map = (mapRef as { getMap?: () => unknown } | null)?.getMap?.();
+          if (!map || typeof map !== 'object') return false;
+          const mapApi = map as {
+            getCanvas: () => HTMLCanvasElement;
+            getZoom: () => number;
+            getMinZoom?: () => number;
+            getMaxZoom?: () => number;
+            unproject: (point: [number, number]) => unknown;
+            zoomTo: (
+              zoom: number,
+              options: { around?: unknown; duration?: number; essential?: boolean }
+            ) => void;
+          };
+          if (
+            typeof mapApi.getCanvas !== 'function' ||
+            typeof mapApi.getZoom !== 'function' ||
+            typeof mapApi.unproject !== 'function' ||
+            typeof mapApi.zoomTo !== 'function'
+          ) {
+            return false;
+          }
+
+          const rect = mapApi.getCanvas().getBoundingClientRect();
+          const around = mapApi.unproject([x - rect.left, y - rect.top]);
+          const sign = direction === 'out' ? -1 : 1;
+          const zoomDelta = Math.max(0.25, Math.min(5.2, steps * 0.28)) * sign;
+          const minZoom = mapApi.getMinZoom?.() ?? 0;
+          const maxZoom = mapApi.getMaxZoom?.() ?? 22;
+          const targetZoom = Math.max(minZoom, Math.min(maxZoom, mapApi.getZoom() + zoomDelta));
+          mapApi.zoomTo(targetZoom, { around, duration: durationMs, essential: true });
+          await new Promise((resolve) => window.setTimeout(resolve, durationMs));
+          return true;
+        },
+        {
+          x: point.x,
+          y: point.y,
+          steps: step.steps,
+          durationMs: step.durationMs,
+          direction: step.direction,
+        }
+      );
+
+      if (!handled) {
+        const perStepMs = Math.floor(step.durationMs / Math.max(1, step.steps));
+        await ctx.page.evaluate(
+          async ({ x, y, steps, durationMs, delta }) => {
+            const wait = (ms: number) =>
+              new Promise<void>((resolve) => window.setTimeout(resolve, ms));
+            const perStep = Math.floor(durationMs / Math.max(1, steps));
+            for (let i = 0; i < steps; i++) {
+              const target = document.elementFromPoint(x, y) ?? document.querySelector('canvas');
+              target?.dispatchEvent(
+                new WheelEvent('wheel', {
+                  bubbles: true,
+                  cancelable: true,
+                  clientX: x,
+                  clientY: y,
+                  deltaY: delta,
+                  deltaMode: WheelEvent.DOM_DELTA_PIXEL,
+                  view: window,
+                })
+              );
+              if (perStep > 0) await wait(perStep);
+            }
+          },
+          { x: point.x, y: point.y, steps: step.steps, durationMs: step.durationMs, delta }
+        );
+        if (perStepMs > 0) await sleep(0);
+      }
+      if (step.waitForMapSettled) {
+        await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
      }
      return;
    }
-    case 'dragSlider':
+    case 'dragSlider': {
+      const mapVersion = ctx.dashboard.getMapDataVersion();
      ctx.cursor = await smoothDragSliderThumb(
        ctx.page,
        step.thumbSelector,
@ -214,12 +336,21 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
        step.toFraction,
        step.durationMs
      );
+      if (step.waitForMapSettled) {
+        await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
+      }
      return;
-    case 'submitForm':
+    }
+    case 'submitForm': {
+      const mapVersion = ctx.dashboard.getMapDataVersion();
      await ctx.page.evaluate((selector) => {
        document.querySelector<HTMLFormElement>(selector)?.requestSubmit();
      }, step.formSelector);
+      if (step.waitForMapSettled) {
+        await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
+      }
      return;
+    }
    case 'showOutro':
      await showOutro(ctx.page, step.brand, step.tagline, step.url);
      return;
@ -269,6 +400,30 @@ async function resolveTarget(
  return { x: box.x + box.width / 2, y: box.y + box.height / 2 };
 }

+async function tryResolveTarget(
+  ctx: ScriptCtx,
+  target: Target,
+  timeoutMs: number
+): Promise<{ x: number; y: number } | null> {
+  if (target.kind !== 'element') {
+    try {
+      return await resolveTarget(ctx, target);
+    } catch {
+      return null;
+    }
+  }
+
+  const locator = ctx.page.locator(target.selector).first();
+  try {
+    await locator.waitFor({ state: 'visible', timeout: timeoutMs });
+    const box = await locator.boundingBox({ timeout: timeoutMs });
+    if (!box) return null;
+    return { x: box.x + box.width / 2, y: box.y + box.height / 2 };
+  } catch {
+    return null;
+  }
+}
+
 /**
 * Load synth's measured cue durations. Falls back to a worst-case estimate
 * if the manifest is missing — that path is only used for ``--no-audio``
--- a/video/src/script.ts
+++ b/video/src/script.ts
@ -118,7 +118,15 @@ export type Activity =
  /** Slide the cursor from its current position to `target`. */
  | { kind: 'moveCursor'; target: Target; durationMs: number }
  /** Move + click + ripple. `durationMs` is the whole gesture, including settle. */
-  | { kind: 'click'; target: Target; durationMs: number }
+  | {
+      kind: 'click';
+      target: Target;
+      durationMs: number;
+      waitForSelectionReady?: boolean;
+      timeoutMs?: number;
+    }
+  /** Move + click when the target is visible; skip without failing otherwise. */
+  | { kind: 'clickIfVisible'; target: Target; durationMs: number; timeoutMs?: number }
  /** Type into a textarea/input over exactly `durationMs`. */
  | { kind: 'type'; selector: string; text: string; durationMs: number }
  /** Grow or shrink the visible cursor (CSS scale). */
@ -135,6 +143,8 @@ export type Activity =
      steps: number;
      durationMs: number;
      direction?: 'in' | 'out';
+      waitForMapSettled?: boolean;
+      timeoutMs?: number;
    }
  /** Drag the right thumb of a Radix slider to a fraction in [0,1]. */
  | {
@ -143,9 +153,17 @@ export type Activity =
      trackSelector: string;
      toFraction: number;
      durationMs: number;
+      waitForMapSettled?: boolean;
+      timeoutMs?: number;
    }
  /** Submit a form found by selector and wait `durationMs`. */
-  | { kind: 'submitForm'; formSelector: string; durationMs: number }
+  | {
+      kind: 'submitForm';
+      formSelector: string;
+      durationMs: number;
+      waitForMapSettled?: boolean;
+      timeoutMs?: number;
+    }
  /** Reveal the closing brand card. */
  | { kind: 'showOutro'; brand: string; tagline: string; url: string; durationMs: number }
  /** Reveal a full-screen ad-style overlay over the live map. */
@ -182,6 +200,8 @@ export type Activity =
 */
 export interface Cue {
  text: string;
+  /** Optional cue-specific caption layout for shots where the default lower-third hides the product. */
+  captionPlacement?: 'side';
  gapBeforeMs: number;
  during?: Activity[];
  tail?: Activity[];
--- a/video/src/storyboard.ts
+++ b/video/src/storyboard.ts
@ -40,7 +40,7 @@ type FormFactor = 'desktop' | 'mobile';
 // most prominent thing on screen (it sits at the top of the bottom
 // sheet which covers ~44% of the viewport), so we skip the wrapper zoom
 // entirely — see buildPre().
-const AI_ZOOM_SCALE_DESKTOP = 2.4;
+const AI_ZOOM_SCALE_DESKTOP = 2.05;

 const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]';
 const TT_SLIDER_MAX = 120;
@ -54,14 +54,16 @@ const TT_DRAG_TO_MIN = 20;
 // sheet, not the map).
 const MAP_FOCUS_DESKTOP = vfrac(1140 / 1920, 605 / 1080);
 const MAP_FOCUS_MOBILE = vfrac(0.5, 0.3);
+const HOMEPAGE_RIGHT_PANE_SELECTOR =
+  '[data-tutorial="right-pane"], .fixed.inset-0.z-50:has(button[aria-label="Close drawer"])';

-// Mobile mapZoom intensity. 6 wheel-steps from the initial zoom (12)
-// lands around zoom 14.5 — postcode polygons clearly visible, individual
-// streets named, hex aggregation broken open. The previous 18-step
-// drill ended past zoom 20 (street-level vector tiles only), so the
-// click landed on featureless terrain.
-const MOBILE_MAP_ZOOM_STEPS = 6;
-const MOBILE_MAP_ZOOM_MS = 1400;
+// Mobile mapZoom intensity. Keep mobile below the old 18-step drill that
+// overshot into featureless street-level tiles, but make the homepage pass
+// visibly break from city blobs into postcode/street scale.
+const MOBILE_MAP_ZOOM_STEPS = 9;
+const MOBILE_MAP_ZOOM_MS = 2200;
+const DESKTOP_MAP_ZOOM_STEPS = 18;
+const DESKTOP_MAP_ZOOM_MS = 4300;

 type RecordingLocale = 'en' | 'de' | 'zh' | 'hi';

@ -74,6 +76,7 @@ interface RecordingLocalization {
  promptText: string;
  travelTimeLabel: string;
  exportButtonTitle: string;
+  colourMapTitle: string;
  brand: {
    name: string;
    tagline: string;
@ -84,6 +87,8 @@ interface RecordingLocalization {
    prompt: string;
    dashboard: string;
    filters: string;
+    zoom: string;
+    open: string;
    details: string;
    shortlist: string;
  };
@ -105,22 +110,29 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
      'strong Manchester accent.',
    voiceReferenceText:
      "Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.",
-    promptText: 'Flat under £300k, 35 min to Manchester, good schools, low crime, quiet streets',
+    promptText:
+      'First home under £315k, 35 min to Manchester, good schools, check crime, road noise, tree cover, fast broadband',
    travelTimeLabel: 'Manchester city centre',
    exportButtonTitle: 'Export to Excel',
+    colourMapTitle: 'Colour map',
    brand: {
      name: 'Perfect Postcode',
-      tagline: 'Know where to look before listings take over.',
+      tagline: 'Find the area before the house.',
      url: BRAND_URL,
    },
    cues: {
-      describe: "Don't pick a home by scrolling listings.",
+      describe: 'A Manchester first-time buyer wants to stop wasting Saturdays on the wrong streets.',
      prompt:
-        'Describe what you want. Budget, commute, schools, whatever matters.',
-      dashboard: 'The map lights up with every postcode in England that fits.',
-      filters: 'Move one slider. The map answers instantly.',
-      details: 'Open any postcode. Sold prices. Schools. Crime. Noise. All on one screen.',
-      shortlist: 'Take your shortlist to the listings. Now you know where to search.',
+        'They type the whole brief: under £315k, thirty-five minutes to town, good schools, low crime, quieter roads, trees, and fast broadband.',
+      dashboard:
+        'The map keeps only the postcodes that match. The rest of the country drops away.',
+      filters:
+        'Now tweak it: cut the commute to twenty minutes and colour the map by travel time.',
+      zoom: 'Zoom in until the blobs become streets, parks, and postcode blocks.',
+      open: 'Open one block that still passes the filters.',
+      details:
+        'On the right, you can see why it passed: journey time, listing links, Street View, sold prices, schools, crime, the noise number, and the tree score.',
+      shortlist: 'Export those postcodes and only search there.',
    },
  },
  de: {
@ -131,26 +143,29 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
      'Calm and cheerful German male narrator with clear standard German pronunciation ' +
      'and a friendly, practical delivery.',
    voiceReferenceText:
-      'Willkommen zur Demonstration. Diese Sprecherstimme hören Sie im gesamten Video.',
+      'Willkommen zur Demonstration. Diese Sprecherstimme hörst du im gesamten Video.',
    promptText:
      'Wohnung unter £300k, 35 Min. nach Manchester, gute Schulen, niedrige Kriminalität, ruhige Straßen',
    travelTimeLabel: 'Stadtzentrum Manchester',
    exportButtonTitle: 'Als Excel exportieren',
+    colourMapTitle: 'Karte einfärben',
    brand: {
      name: 'Perfect Postcode',
-      tagline: 'Wissen, wo Sie suchen sollten, bevor Inserate Ihre Suche bestimmen.',
+      tagline: 'Wissen, wo du suchen solltest, bevor Inserate deine Suche bestimmen.',
      url: BRAND_URL,
    },
    cues: {
-      describe: 'Wählen Sie kein Zuhause durch endloses Scrollen.',
+      describe: 'Wähle kein Zuhause durch endloses Scrollen.',
      prompt:
-        'Beschreiben Sie, was Ihnen wichtig ist. Budget, Pendelzeit, Schulen, alles.',
+        'Beschreibe, was dir wichtig ist. Budget, Pendelzeit, Schulen, alles.',
      dashboard: 'Die Karte zeigt jede passende Postleitzahl in ganz England.',
      filters: 'Ein Regler bewegt sich. Die Karte antwortet sofort.',
+      zoom: 'Jetzt von der Stadtansicht bis zu echten Straßen zoomen.',
+      open: 'Öffne einen Treffer und sieh, warum er übrig bleibt.',
      details:
-        'Öffnen Sie eine Postleitzahl. Preise. Schulen. Kriminalität. Lärm. Alles auf einer Karte.',
+        'Öffne eine Postleitzahl. Preise. Schulen. Kriminalität. Lärm. Alles auf einer Karte.',
      shortlist:
-        'Mit dieser Auswahl zu den Inseraten. Sie wissen jetzt, wo Sie suchen sollen.',
+        'Mit dieser Auswahl zu den Inseraten. Du weißt jetzt, wo du suchen sollst.',
    },
  },
  zh: {
@ -164,6 +179,7 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
    promptText: '30万英镑以内的公寓，35分钟到曼彻斯特，学校好，犯罪率低，街道安静',
    travelTimeLabel: '曼彻斯特市中心',
    exportButtonTitle: '导出为 Excel',
+    colourMapTitle: '为地图着色',
    brand: {
      name: 'Perfect Postcode',
      tagline: '先知道该看哪里，再让房源牵着你走。',
@ -174,6 +190,8 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
      prompt: '用日常话告诉地图你想要的家。预算、通勤、学校，什么都行。',
      dashboard: '地图点亮每一个符合条件的英格兰邮编。',
      filters: '动一个滑块，地图立刻给答案。',
+      zoom: '现在从城市范围放大到真实街道。',
+      open: '打开一个匹配项，看看它为什么留下来。',
      details: '打开任意邮编。成交价、学校、犯罪率、噪音，一目了然。',
      shortlist: '带着这份清单去房源网站。现在你知道该在哪儿找了。',
    },
@ -190,6 +208,7 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
    promptText: 'Flat under £300k, 35 min to Manchester, good schools, low crime, quiet streets',
    travelTimeLabel: 'Manchester city centre',
    exportButtonTitle: 'Excel में निर्यात करें',
+    colourMapTitle: 'नक्शे को रंगें',
    brand: {
      name: 'Perfect Postcode',
      tagline: 'Know where to look before listings take over.',
@ -201,6 +220,8 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
        'Describe what you want. Budget, commute, schools, whatever matters.',
      dashboard: 'The map lights up with every postcode in England that fits.',
      filters: 'Move one slider. The map answers instantly.',
+      zoom: 'Now zoom in from the city pattern to actual streets.',
+      open: 'Open one match and see why it made the cut.',
      details: 'Open any postcode. Sold prices. Schools. Crime. Noise. All on one screen.',
      shortlist: 'Take your shortlist to the listings. Now you know where to search.',
    },
@ -211,25 +232,23 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
  const copy = RECORDING_LOCALIZATIONS[locale];
  const isMobile = formFactor === 'mobile';
  const mapFocus = isMobile ? MAP_FOCUS_MOBILE : MAP_FOCUS_DESKTOP;
-  const mapZoomSteps = isMobile ? MOBILE_MAP_ZOOM_STEPS : 18;
-  const mapZoomMs = isMobile ? MOBILE_MAP_ZOOM_MS : 1500;
-  // Click target stays at the mapZoom focus point. On mobile we kept the
-  // zoom shallow (6 wheel-steps → ~zoom 14.5) specifically so the centre
-  // of the visible map area lands on a real postcode polygon at that
-  // depth; using a vfrac target is deterministic and avoids needing a
-  // `[data-tutorial="map"]` anchor in the MobileMapPage DOM (it has
-  // none — that attribute lives only on DesktopMapPage).
-  const clickTarget = mapFocus;
+  const mapZoomSteps = isMobile ? MOBILE_MAP_ZOOM_STEPS : DESKTOP_MAP_ZOOM_STEPS;
+  const mapZoomMs = isMobile ? MOBILE_MAP_ZOOM_MS : DESKTOP_MAP_ZOOM_MS;
+  const colourTravelTime = el(`${TT_CARD_SELECTOR} button[title="${copy.colourMapTitle}"]`);
+  const postcodeDemoTarget = isMobile
+    ? vfrac(320 / 540, 255 / 960)
+    : vfrac(1087 / 1920, 520 / 1080);
+  const openPostcodeTarget = postcodeDemoTarget;
+  const zoomPostcodeTarget = postcodeDemoTarget;
+  const cursorParkTarget = isMobile ? vfrac(0.12, 0.61) : vfrac(0.12, 0.18);
+  const definingCharacteristicsSelector =
+    '[data-tutorial="right-pane"] button:has-text("Defining characteristics"), ' +
+    '.fixed.inset-0.z-50:has(button[aria-label="Close drawer"]) button:has-text("Defining characteristics")';

-  // Cue 5 (shortlist) on mobile: the Export button lives inside the
-  // hidden hamburger menu, not in the header — opening it cleanly would
-  // need a localised aria-label lookup. Instead we pull the map back
-  // out to the filtered overview so the cut ends on a satisfying wide
-  // shot of the matching postcodes rather than the post-click zoom.
  const shortlistActivities: Storyboard['cues'][number]['during'] =
    formFactor === 'desktop'
      ? [
-          { kind: 'zoomReset', durationMs: 900 },
+          { kind: 'zoomReset', durationMs: 800 },
          {
            kind: 'click',
            target: el(`button[title="${copy.exportButtonTitle}"]`),
@ -237,14 +256,19 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
          },
        ]
      : [
-          // Reverse the cue-4 zoom-in exactly so we land back on the
-          // initial filtered dashboard view (hexagons visible).
+          {
+            kind: 'click',
+            target: el('button[aria-label="Close drawer"]'),
+            durationMs: 650,
+          },
          {
            kind: 'mapZoom',
            target: mapFocus,
            steps: MOBILE_MAP_ZOOM_STEPS,
            durationMs: MOBILE_MAP_ZOOM_MS,
            direction: 'out',
+            waitForMapSettled: true,
+            timeoutMs: 12000,
          },
        ];

@ -252,7 +276,17 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
    {
      text: copy.cues.describe,
      gapBeforeMs: 0,
-      tail: [{ kind: 'wait', durationMs: 250 }],
+      during: isMobile
+        ? [{ kind: 'wait', durationMs: 700 }]
+        : [
+            {
+              kind: 'zoomTo',
+              target: el('[data-tutorial="ai-filters"]'),
+              scale: AI_ZOOM_SCALE_DESKTOP,
+              durationMs: 900,
+            },
+          ],
+      tail: [{ kind: 'wait', durationMs: 150 }],
    },
    {
      text: copy.cues.prompt,
@ -262,17 +296,25 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
          kind: 'type',
          selector: '[data-tutorial="ai-filters"] textarea',
          text: copy.promptText,
-          durationMs: 3000,
+          durationMs: 4300,
        },
-        { kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1200 },
      ],
-      tail: [{ kind: 'wait', durationMs: 500 }],
+      tail: [{ kind: 'wait', durationMs: 120 }],
    },
    {
      text: copy.cues.dashboard,
      gapBeforeMs: 300,
-      during: [{ kind: 'zoomReset', durationMs: 1400 }],
-      tail: [{ kind: 'wait', durationMs: 500 }],
+      during: [
+        {
+          kind: 'submitForm',
+          formSelector: '[data-tutorial="ai-filters"] form',
+          durationMs: 2200,
+          waitForMapSettled: true,
+          timeoutMs: 15000,
+        },
+        { kind: 'zoomReset', durationMs: 900 },
+      ],
+      tail: [{ kind: 'wait', durationMs: 300 }],
    },

    {
@ -284,45 +326,106 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
          thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
          trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
          toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
-          durationMs: 1000,
+          durationMs: 1800,
        },
+        { kind: 'click', target: colourTravelTime, durationMs: 750 },
      ],
-      tail: [{ kind: 'wait', durationMs: 400 }],
+      tail: [{ kind: 'wait', durationMs: 350 }],
    },

    {
-      text: copy.cues.details,
+      text: copy.cues.zoom,
      gapBeforeMs: 500,
      during: [
        { kind: 'cursorScale', scale: 1.4, durationMs: 200 },
        {
          kind: 'mapZoom',
-          target: mapFocus,
+          target: zoomPostcodeTarget,
          steps: mapZoomSteps,
          durationMs: mapZoomMs,
        },
      ],
      tail: [
-        // Wait for the post-zoom /api/postcodes response and a redraw
-        // before the click — otherwise the click can fire on a stale
-        // frame and miss the polygon.
-        { kind: 'wait', durationMs: 500 },
+        { kind: 'moveCursor', target: cursorParkTarget, durationMs: 250 },
+        { kind: 'wait', durationMs: 120 },
+      ],
+    },
+
+    {
+      text: copy.cues.open,
+      gapBeforeMs: 200,
+      during: [
        {
          kind: 'click',
-          target: clickTarget,
-          durationMs: 700,
+          target: openPostcodeTarget,
+          durationMs: 1200,
+          waitForSelectionReady: true,
+          timeoutMs: 6000,
        },
-        { kind: 'cursorScale', scale: 1, durationMs: 280 },
-        // Linger so the climax cue lands on the right-pane reveal.
-        { kind: 'wait', durationMs: 1500 },
+        { kind: 'cursorScale', scale: 1, durationMs: 250 },
      ],
+      tail: [{ kind: 'wait', durationMs: 300 }],
+    },
+
+    {
+      text: copy.cues.details,
+      captionPlacement: isMobile ? undefined : 'side',
+      gapBeforeMs: 250,
+      during: isMobile
+        ? [
+            {
+              kind: 'scrollPane',
+              selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
+              top: 430,
+              durationMs: 900,
+            },
+            {
+              kind: 'clickIfVisible',
+              target: el(definingCharacteristicsSelector),
+              durationMs: 650,
+              timeoutMs: 700,
+            },
+            {
+              kind: 'scrollPane',
+              selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
+              top: 700,
+              durationMs: 850,
+            },
+          ]
+        : [
+            {
+              kind: 'zoomTo',
+              target: el(HOMEPAGE_RIGHT_PANE_SELECTOR),
+              scale: 1.35,
+              durationMs: 950,
+            },
+            {
+              kind: 'scrollPane',
+              selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
+              top: 360,
+              durationMs: 850,
+            },
+            {
+              kind: 'clickIfVisible',
+              target: el(definingCharacteristicsSelector),
+              durationMs: 650,
+              timeoutMs: 700,
+            },
+            {
+              kind: 'scrollPane',
+              selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
+              top: 920,
+              durationMs: 850,
+            },
+          ],
+      tail: [{ kind: 'wait', durationMs: 700 }],
    },

    {
      text: copy.cues.shortlist,
      gapBeforeMs: 500,
      during: shortlistActivities,
-      tail: [{ kind: 'wait', durationMs: 800 }],
+      tail: [{ kind: 'wait', durationMs: 650 }],
    },

    {
@ -344,26 +447,14 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard

 function buildPre(formFactor: FormFactor): Storyboard['pre'] {
  if (formFactor === 'mobile') {
-    // Mobile skips the wrapper-zoom into the AI card. On a 540-wide
-    // viewport the bottom sheet already occupies ~44% of the screen
-    // and the AI card sits at the top of it — leaning further in would
-    // overflow the card width and crop the placeholder. We just clear
-    // the vignette and let the typing draw the eye.
    return [
      { kind: 'clearVignette', durationMs: 0 },
-      { kind: 'wait', durationMs: 400 },
+      { kind: 'wait', durationMs: 120 },
    ];
  }
  return [
    { kind: 'clearVignette', durationMs: 0 },
-    { kind: 'wait', durationMs: 200 },
-    {
-      kind: 'zoomTo',
-      target: el('[data-tutorial="ai-filters"]'),
-      scale: AI_ZOOM_SCALE_DESKTOP,
-      durationMs: 1300,
-    },
-    { kind: 'wait', durationMs: 140 },
+    { kind: 'wait', durationMs: 120 },
  ];
 }

@ -452,10 +543,13 @@ function createRecordingStoryboard(
      // Filters returned by the AI stub. Keys MUST match real feature names
      // from /api/features (verified against the running server's schema).
      stubbedFilters: {
-        'Property type': ['Flats/Maisonettes'],
-        'Estimated current price': [0, 300000],
-        'Serious crime per 1k residents (avg/yr)': [0, 55],
-        'Outstanding primary schools within 2km': [1, 10],
+        'Property type': ['Flats/Maisonettes', 'Semi-Detached'],
+        'Estimated current price': [0, 315000],
+        'Serious crime per 1k residents (avg/yr)': [0, 70],
+        'Good+ primary schools within 2km': [1, 10],
+        'Noise (dB)': [50, 70],
+        'Street tree density percentile': [25, 100],
+        'Max available download speed (Mbps)': ['100', '300', '1000'],
      },
      // Travel-time filters returned by the AI stub. Slug matches the real
      // /api/travel-destinations?mode=transit response.
@ -481,6 +575,10 @@ function createRecordingStoryboard(
 const RECORDING_LOCALES: readonly RecordingLocale[] = ['en', 'de', 'zh', 'hi'];
 const RECORDING_FORM_FACTORS: readonly FormFactor[] = ['desktop', 'mobile'];

+const ENGLISH_HOMEPAGE_STORYBOARDS: Storyboard[] = RECORDING_FORM_FACTORS.map((formFactor) =>
+  createRecordingStoryboard('en', formFactor)
+);
+
 const DEMO_STORYBOARDS: Storyboard[] = RECORDING_LOCALES.flatMap((locale) =>
  RECORDING_FORM_FACTORS.map((formFactor) => createRecordingStoryboard(locale, formFactor))
 );
@ -1271,14 +1369,21 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [

 const AD_STORYBOARDS = AD_CONFIGS.map(createDemoAdStoryboard);

-const STORYBOARD_SET = process.env.VIDEO_STORYBOARD_SET ?? 'ads';
+const STORYBOARD_SET = process.env.VIDEO_STORYBOARD_SET ?? 'homepage-en';

-export const storyboards: Storyboard[] =
-  STORYBOARD_SET === 'demo'
-    ? DEMO_STORYBOARDS
-    : STORYBOARD_SET === 'all'
-      ? [...AD_STORYBOARDS, ...DEMO_STORYBOARDS]
-      : AD_STORYBOARDS;
+export const storyboards: Storyboard[] = (() => {
+  switch (STORYBOARD_SET) {
+    case 'homepage-en':
+      return ENGLISH_HOMEPAGE_STORYBOARDS;
+    case 'demo':
+      return DEMO_STORYBOARDS;
+    case 'all':
+      return [...AD_STORYBOARDS, ...DEMO_STORYBOARDS];
+    case 'ads':
+    default:
+      return AD_STORYBOARDS;
+  }
+})();

 export function getStoryboard(name: string): Storyboard {
  const sb = storyboards.find((s) => s.name === name);
--- a/video/tts/mux.py
+++ b/video/tts/mux.py
@ -169,7 +169,6 @@ def main() -> int:
        "aac",
        "-b:a",
        "192k",
-        "-shortest",
        "-movflags",
        "+faststart",
        str(out_path),
--- a/video/tts/synth.py
+++ b/video/tts/synth.py
@ -168,6 +168,61 @@ def cached_index_matches(
    return True


+def load_reusable_items(
+    index_path: Path,
+    cues: list[dict],
+    instruct: str,
+    language: str,
+    reference_text: str,
+    design_model: str,
+    clone_model: str,
+    reference_audio: str,
+    seed: int,
+    temperature: float,
+    top_p: float,
+) -> dict[int, dict]:
+    """Return cue-indexed cached items that match the current synth settings.
+
+    Unlike ``cached_index_matches`` this accepts a partial index, so a long
+    CPU synthesis run can be resumed cue-by-cue after an interruption.
+    """
+    if not index_path.exists():
+        return {}
+    try:
+        cached = json.loads(index_path.read_text())
+    except json.JSONDecodeError:
+        return {}
+    if cached.get("instruct") != instruct or cached.get("language") != language:
+        return {}
+    if cached.get("referenceText") != reference_text:
+        return {}
+    if cached.get("designModel") != design_model or cached.get("cloneModel") != clone_model:
+        return {}
+    if cached.get("referenceAudio", "") != reference_audio:
+        return {}
+    if int(cached.get("seed", -1)) != seed:
+        return {}
+    if float(cached.get("temperature", -1)) != temperature:
+        return {}
+    if float(cached.get("topP", -1)) != top_p:
+        return {}
+
+    cue_by_index = {int(c["cueIndex"]): c for c in cues}
+    reusable: dict[int, dict] = {}
+    for item in cached.get("items", []):
+        cue_index = int(item.get("cueIndex", -1))
+        cue = cue_by_index.get(cue_index)
+        wav = item.get("wav")
+        if cue is None or not wav or not (index_path.parent / wav).exists():
+            continue
+        if cue["text"].strip() != str(item.get("text", "")).strip():
+            continue
+        if int(cue.get("gapBeforeMs", 0)) != int(item.get("gapBeforeMs", -1)):
+            continue
+        reusable[cue_index] = item
+    return reusable
+
+
 def seed_everything(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
@ -333,34 +388,74 @@ def main() -> int:
    )

    print(
-        f"[synth] cloning {len(texts)} cues from reference (x_vector_only) — one batched call",
+        f"[synth] cloning {len(texts)} cues from reference (x_vector_only)",
        flush=True,
    )
    for i, t in enumerate(texts):
        print(f"[synth]   {i:2d}: {t}", flush=True)

    clone_model = load_model(args.clone_model, args.device)
-    seed_everything(seed)
-    wavs, sr = clone_model.generate_voice_clone(
-        text=texts,
-        language=language,
-        ref_audio=str(ref_wav_path),
-        ref_text=ref_text,
-        x_vector_only_mode=True,
-        non_streaming_mode=True,
-        do_sample=True,
-        temperature=temperature,
-        top_p=top_p,
+    out_index_base = {
+        "storyboard": args.storyboard,
+        "instruct": instruct,
+        "language": language,
+        "designModel": args.design_model,
+        "cloneModel": args.clone_model,
+        "referenceAudio": reference_audio_cache_key,
+        "referenceText": ref_text,
+        "seed": seed,
+        "temperature": temperature,
+        "topP": top_p,
+    }
+    index_path = audio_dir / "index.json"
+    reusable = load_reusable_items(
+        index_path,
+        cues,
+        instruct,
+        language,
+        reference_text,
+        args.design_model,
+        args.clone_model,
+        reference_audio_cache_key,
+        seed,
+        temperature,
+        top_p,
    )
-    if len(wavs) != len(texts):
-        print(
-            f"[synth] model returned {len(wavs)} wavs for {len(texts)} cues",
-            file=sys.stderr,
-        )
-        return 1
+
+    def write_index(items: list[dict]) -> None:
+        index_path.write_text(json.dumps({**out_index_base, "items": items}, indent=2))

    items = []
-    for cue, audio in zip(cues, wavs):
+    for cue_index, cue in enumerate(cues):
+        cached_item = reusable.get(int(cue["cueIndex"]))
+        if cached_item:
+            items.append(cached_item)
+            write_index(items)
+            print(
+                f"[synth] reusing {cached_item['wav']}  {int(cached_item['durationMs']):>5d}ms  «{cue['text']}»",
+                flush=True,
+            )
+            continue
+
+        seed_everything(seed + cue_index)
+        wavs, sr = clone_model.generate_voice_clone(
+            text=[texts[cue_index]],
+            language=language,
+            ref_audio=str(ref_wav_path),
+            ref_text=ref_text,
+            x_vector_only_mode=True,
+            non_streaming_mode=True,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+        )
+        if len(wavs) != 1:
+            print(
+                f"[synth] model returned {len(wavs)} wavs for cue {cue_index}",
+                file=sys.stderr,
+            )
+            return 1
+        audio = wavs[0]
        if hasattr(audio, "cpu"):
            audio = audio.cpu().float().numpy()
        wav_name = f"cue_{cue['cueIndex']:03d}.wav"
@ -377,25 +472,13 @@ def main() -> int:
                "durationMs": duration_ms,
            }
        )
+        write_index(items)
        print(
            f"[synth] wrote {wav_name}  {duration_ms:>5d}ms  «{cue['text']}»",
            flush=True,
        )

-    out_index = {
-        "storyboard": args.storyboard,
-        "instruct": instruct,
-        "language": language,
-        "designModel": args.design_model,
-        "cloneModel": args.clone_model,
-        "referenceAudio": reference_audio_cache_key,
-        "referenceText": ref_text,
-        "seed": seed,
-        "temperature": temperature,
-        "topP": top_p,
-        "items": items,
-    }
-    (audio_dir / "index.json").write_text(json.dumps(out_index, indent=2))
+    write_index(items)
    total_ms = sum(it["gapBeforeMs"] + it["durationMs"] for it in items)
    print(
        f"[synth] [{args.storyboard}] {len(items)} cues, {total_ms}ms of audio (incl. gaps) -> {audio_dir}",