LGTM

2026-05-11 21:38:26 +01:00 · 2026-05-11 21:38:26 +01:00 · f2a2651b8a
commit f2a2651b8a
parent 9248e26af2
95 changed files with 3993 additions and 1471 deletions
--- a/video/tts/mux.py
+++ b/video/tts/mux.py
@ -1,19 +1,19 @@
-"""Mux per-cue WAVs into recording.mp4 at their narration offsets.
+"""Mux per-cue WAVs into one storyboard's recording.mp4 at narration offsets.

-Reads two manifests:
+Reads two manifests inside ``output/<storyboard>/``:

-* ``output/audio/index.json`` (synth output) — per-cue WAV filename + measured
+* ``audio/index.json`` (synth output) — per-cue WAV filename + measured
  duration. Generated BEFORE recording in one batched Qwen3-TTS call.
-* ``output/narration.json`` (recorder output) — per-cue ``videoTimeMs`` against
+* ``narration.json`` (recorder output) — per-cue ``videoTimeMs`` against
  the trimmed video. Generated DURING recording.

 Joins them by ``cueIndex`` (index in the cue list, 1:1 between manifests),
 runs ffmpeg with one ``adelay`` per cue plus a single ``amix``, copies the
-video stream, and writes ``output/recording.narrated.mp4``.
+video stream, and writes ``output/<storyboard>/recording.narrated.mp4``.

 Run from the ``video/`` directory after recording:

-    uv run --project tts python tts/mux.py
+    uv run --project tts python tts/mux.py --storyboard recording
 """

 from __future__ import annotations
@ -28,23 +28,21 @@ from pathlib import Path

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--audio-dir", type=Path, default=Path("output/audio"))
    parser.add_argument(
-        "--narration",
-        type=Path,
-        default=Path("output/narration.json"),
-        help="Per-cue videoTimeMs manifest written by the recorder.",
+        "--storyboard",
+        required=True,
+        help="Storyboard slug (matches Storyboard.name in src/storyboard.ts).",
    )
-    parser.add_argument("--video", type=Path, default=Path("output/recording.mp4"))
    parser.add_argument(
-        "--out",
+        "--output-dir",
        type=Path,
-        default=Path("output/recording.narrated.mp4"),
+        default=Path("output"),
+        help="Root output directory; per-storyboard files live in <root>/<storyboard>/.",
    )
    parser.add_argument(
        "--replace",
        action="store_true",
-        help="After muxing, atomically replace --video with --out.",
+        help="After muxing, atomically replace the storyboard's recording.mp4.",
    )
    return parser.parse_args()

@ -56,7 +54,13 @@ def main() -> int:
        print("[mux] ffmpeg not on PATH", file=sys.stderr)
        return 1

-    audio_index_path = args.audio_dir / "index.json"
+    storyboard_dir = args.output_dir / args.storyboard
+    audio_dir = storyboard_dir / "audio"
+    narration_path = storyboard_dir / "narration.json"
+    video_path = storyboard_dir / "recording.mp4"
+    out_path = storyboard_dir / "recording.narrated.mp4"
+
+    audio_index_path = audio_dir / "index.json"
    if not audio_index_path.exists():
        print(
            f"[mux] {audio_index_path} not found; run tts/synth.py first",
@ -64,25 +68,25 @@ def main() -> int:
        )
        return 1

-    if not args.narration.exists():
+    if not narration_path.exists():
        print(
-            f"[mux] {args.narration} not found; the recorder must run before mux",
+            f"[mux] {narration_path} not found; the recorder must run before mux",
            file=sys.stderr,
        )
        return 1

-    if not args.video.exists():
-        print(f"[mux] video not found: {args.video}", file=sys.stderr)
+    if not video_path.exists():
+        print(f"[mux] video not found: {video_path}", file=sys.stderr)
        return 1

    audio_index = json.loads(audio_index_path.read_text())
    audio_items = [it for it in audio_index.get("items", []) if it.get("wav")]
    if not audio_items:
        print("[mux] synth produced no cues; copying video unchanged", file=sys.stderr)
-        shutil.copyfile(args.video, args.out)
+        shutil.copyfile(video_path, out_path)
        return 0

-    narration = json.loads(args.narration.read_text())
+    narration = json.loads(narration_path.read_text())
    nar_cues = list(narration.get("cues", []))
    if len(nar_cues) != len(audio_items):
        print(
@ -130,9 +134,9 @@ def main() -> int:
            + "\n  - ".join(overlaps)
        )

-    cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(args.video)]
+    cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(video_path)]
    for it in items:
-        cmd += ["-i", str(args.audio_dir / it["wav"])]
+        cmd += ["-i", str(audio_dir / it["wav"])]

    filter_parts: list[str] = []
    mix_inputs: list[str] = []
@ -168,18 +172,21 @@ def main() -> int:
        "-shortest",
        "-movflags",
        "+faststart",
-        str(args.out),
+        str(out_path),
    ]

-    print(f"[mux] muxing {len(items)} narration cues into {args.out}", flush=True)
+    print(
+        f"[mux] [{args.storyboard}] muxing {len(items)} narration cues into {out_path}",
+        flush=True,
+    )
    result = subprocess.run(cmd)
    if result.returncode != 0:
        print(f"[mux] ffmpeg exited {result.returncode}", file=sys.stderr)
        return result.returncode

    if args.replace:
-        args.out.replace(args.video)
-        print(f"[mux] replaced {args.video} with narrated copy", flush=True)
+        out_path.replace(video_path)
+        print(f"[mux] replaced {video_path} with narrated copy", flush=True)

    return 0