More FE changes

2026-05-09 09:43:41 +01:00 · 2026-05-09 09:43:41 +01:00 · a48eb945e0
commit a48eb945e0
parent f114ada255
48 changed files with 4127 additions and 1751 deletions
--- a/video/render.sh
+++ b/video/render.sh
@ -10,15 +10,17 @@
 #   ./render.sh                # full pipeline (uses cached auth.json if fresh)
 #   ./render.sh --fresh-auth   # force re-auth even if auth.json exists
 #   ./render.sh --no-encode    # stop at WebM, skip MP4 encode
+#   ./render.sh --no-audio     # skip Qwen3-TTS narration; publish silent MP4
 #   FORCE_AUTH=1 ./render.sh   # same as --fresh-auth
 #   APP_URL=http://localhost:3001 ./render.sh   # override frontend URL
+#   TTS_SPEAKER=aiden ./render.sh               # override CustomVoice speaker

 set -euo pipefail

 # -- config (override via env) -------------------------------------------------
-APP_URL="${APP_URL:-http://host.docker.internal:3001}"
-PB_URL="${PB_URL:-http://host.docker.internal:8090}"
-API_URL="${API_URL:-http://host.docker.internal:8001}"
+export APP_URL="${APP_URL:-http://host.docker.internal:3001}"
+export PB_URL="${PB_URL:-http://host.docker.internal:8090}"
+export API_URL="${API_URL:-http://host.docker.internal:8001}"
 PB_ADMIN_EMAIL="${PB_ADMIN_EMAIL:-admin@propertymap.local}"
 PB_ADMIN_PASSWORD="${PB_ADMIN_PASSWORD:-propertymap-dev-2024}"
 PB_EMAIL="${PB_EMAIL:-demo-video@local.test}"
@ -34,14 +36,28 @@ PUBLISH_DIR="${PUBLISH_DIR:-../frontend/public/video}"
 # caption visible.
 POSTER_TIME_S="${POSTER_TIME_S:-16}"

+# Recorder/encoder knobs read by src/config.ts. config.ts treats these as
+# required, so they live here (the only entry point) rather than as defaults
+# scattered across TS modules. Override per-run via env.
+export ASPECT="${ASPECT:-16x9}"
+export CAPTURE_SCALE="${CAPTURE_SCALE:-1}"
+export WEBM_BITRATE="${WEBM_BITRATE:-$(awk -v s="$CAPTURE_SCALE" 'BEGIN{print (s+0>1)?"18M":"8M"}')}"
+export PROMPT_TEXT="${PROMPT_TEXT:-Flats or terraces <£450k, 35 min to Manchester, low crime}"
+export AI_ZOOM_SCALE="${AI_ZOOM_SCALE:-2.4}"
+export MAX_DURATION_S="${MAX_DURATION_S:-45}"
+export MIN_DURATION_S="${MIN_DURATION_S:-10}"
+export OUTPUT_FPS="${OUTPUT_FPS:-50}"
+
 FRESH_AUTH="${FORCE_AUTH:-0}"
 DO_ENCODE=1
+DO_AUDIO=1
 for arg in "$@"; do
  case "$arg" in
    --fresh-auth) FRESH_AUTH=1 ;;
    --no-encode) DO_ENCODE=0 ;;
+    --no-audio) DO_AUDIO=0 ;;
    -h|--help)
-      sed -n '3,18p' "$0"
+      sed -n '3,20p' "$0"
      exit 0 ;;
    *) echo "Unknown arg: $arg" >&2; exit 2 ;;
  esac
@ -124,12 +140,36 @@ else
  say "Reusing existing auth.json"
 fi

-# -- record -------------------------------------------------------------------
-say "Recording"
+# -- preflight + synth (Qwen3-TTS) -------------------------------------------
+# Synth runs BEFORE recording: one batched generate_custom_voice call across
+# all cues so the voice stays consistent. The recorder reads
+# output/audio/index.json for measured per-cue durations and sizes each
+# cue's wall-clock to fit; --no-audio skips synth and the recorder falls
+# back to a worst-case estimate.
 mkdir -p output
 # Wipe last run's leaking artifacts so the rename step picks up *this* run.
 rm -f output/recording.webm output/recording.mp4 output/page@*.webm output/page@*.webm.untrimmed
+rm -f output/narration-script.json output/narration.json
+# output/audio/ is preserved; tts/synth.py decides whether the cached WAVs
+# still match the script and skips generation when they do.

+say "Preflight: emitting narration script"
+node dist/preflight.js
+
+if [ "$DO_AUDIO" = "1" ]; then
+  if ! command -v uv >/dev/null 2>&1; then
+    fail "uv not on PATH (required for Qwen3-TTS synth). Install uv or rerun with --no-audio."
+  fi
+  say "Synthesising narration with Qwen3-TTS (speaker=${TTS_SPEAKER:-ryan}) — one batched call"
+  uv sync --project tts || fail "uv sync failed in video/tts"
+  uv run --project tts python tts/synth.py || fail "tts/synth.py failed"
+  if [ ! -s output/audio/index.json ]; then
+    fail "synth did not produce output/audio/index.json"
+  fi
+fi
+
+# -- record -------------------------------------------------------------------
+say "Recording"
 APP_URL="$APP_URL" node dist/record.js

 if [ ! -s output/recording.webm ]; then
@ -163,6 +203,20 @@ if [ "$DO_ENCODE" = "1" ]; then
  node dist/verify.js output/recording.mp4 output/poster.jpg
 fi

+# -- mux narration ------------------------------------------------------------
+# Synth already produced per-cue WAVs (in output/audio/); the recorder logged
+# each cue's videoTime against the trimmed timeline. Drop the WAVs onto the
+# mp4 with one ffmpeg adelay+amix and replace the silent recording in place.
+if [ "$DO_ENCODE" = "1" ] && [ "$DO_AUDIO" = "1" ]; then
+  if [ ! -s output/narration.json ]; then
+    fail "narration.json missing — recorder did not log cues"
+  fi
+  say "Muxing narration into output/recording.mp4"
+  uv run --project tts python tts/mux.py --replace \
+    || fail "tts/mux.py failed"
+  node dist/verify.js output/recording.mp4
+fi
+
 # -- publish to homepage ------------------------------------------------------
 # Only publish when we did the encode (otherwise we'd be copying a stale
 # mp4 next to a fresh webm). --no-encode skips this whole block.