This commit is contained in:
Andras Schmelczer 2026-05-11 21:38:26 +01:00
parent 9248e26af2
commit f2a2651b8a
95 changed files with 3993 additions and 1471 deletions

View file

@ -1,19 +1,19 @@
"""Mux per-cue WAVs into recording.mp4 at their narration offsets.
"""Mux per-cue WAVs into one storyboard's recording.mp4 at narration offsets.
Reads two manifests:
Reads two manifests inside ``output/<storyboard>/``:
* ``output/audio/index.json`` (synth output) per-cue WAV filename + measured
* ``audio/index.json`` (synth output) per-cue WAV filename + measured
duration. Generated BEFORE recording in one batched Qwen3-TTS call.
* ``output/narration.json`` (recorder output) per-cue ``videoTimeMs`` against
* ``narration.json`` (recorder output) per-cue ``videoTimeMs`` against
the trimmed video. Generated DURING recording.
Joins them by ``cueIndex`` (index in the cue list, 1:1 between manifests),
runs ffmpeg with one ``adelay`` per cue plus a single ``amix``, copies the
video stream, and writes ``output/recording.narrated.mp4``.
video stream, and writes ``output/<storyboard>/recording.narrated.mp4``.
Run from the ``video/`` directory after recording:
uv run --project tts python tts/mux.py
uv run --project tts python tts/mux.py --storyboard recording
"""
from __future__ import annotations
@ -28,23 +28,21 @@ from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--audio-dir", type=Path, default=Path("output/audio"))
parser.add_argument(
"--narration",
type=Path,
default=Path("output/narration.json"),
help="Per-cue videoTimeMs manifest written by the recorder.",
"--storyboard",
required=True,
help="Storyboard slug (matches Storyboard.name in src/storyboard.ts).",
)
parser.add_argument("--video", type=Path, default=Path("output/recording.mp4"))
parser.add_argument(
"--out",
"--output-dir",
type=Path,
default=Path("output/recording.narrated.mp4"),
default=Path("output"),
help="Root output directory; per-storyboard files live in <root>/<storyboard>/.",
)
parser.add_argument(
"--replace",
action="store_true",
help="After muxing, atomically replace --video with --out.",
help="After muxing, atomically replace the storyboard's recording.mp4.",
)
return parser.parse_args()
@ -56,7 +54,13 @@ def main() -> int:
print("[mux] ffmpeg not on PATH", file=sys.stderr)
return 1
audio_index_path = args.audio_dir / "index.json"
storyboard_dir = args.output_dir / args.storyboard
audio_dir = storyboard_dir / "audio"
narration_path = storyboard_dir / "narration.json"
video_path = storyboard_dir / "recording.mp4"
out_path = storyboard_dir / "recording.narrated.mp4"
audio_index_path = audio_dir / "index.json"
if not audio_index_path.exists():
print(
f"[mux] {audio_index_path} not found; run tts/synth.py first",
@ -64,25 +68,25 @@ def main() -> int:
)
return 1
if not args.narration.exists():
if not narration_path.exists():
print(
f"[mux] {args.narration} not found; the recorder must run before mux",
f"[mux] {narration_path} not found; the recorder must run before mux",
file=sys.stderr,
)
return 1
if not args.video.exists():
print(f"[mux] video not found: {args.video}", file=sys.stderr)
if not video_path.exists():
print(f"[mux] video not found: {video_path}", file=sys.stderr)
return 1
audio_index = json.loads(audio_index_path.read_text())
audio_items = [it for it in audio_index.get("items", []) if it.get("wav")]
if not audio_items:
print("[mux] synth produced no cues; copying video unchanged", file=sys.stderr)
shutil.copyfile(args.video, args.out)
shutil.copyfile(video_path, out_path)
return 0
narration = json.loads(args.narration.read_text())
narration = json.loads(narration_path.read_text())
nar_cues = list(narration.get("cues", []))
if len(nar_cues) != len(audio_items):
print(
@ -130,9 +134,9 @@ def main() -> int:
+ "\n - ".join(overlaps)
)
cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(args.video)]
cmd: list[str] = ["ffmpeg", "-y", "-loglevel", "warning", "-i", str(video_path)]
for it in items:
cmd += ["-i", str(args.audio_dir / it["wav"])]
cmd += ["-i", str(audio_dir / it["wav"])]
filter_parts: list[str] = []
mix_inputs: list[str] = []
@ -168,18 +172,21 @@ def main() -> int:
"-shortest",
"-movflags",
"+faststart",
str(args.out),
str(out_path),
]
print(f"[mux] muxing {len(items)} narration cues into {args.out}", flush=True)
print(
f"[mux] [{args.storyboard}] muxing {len(items)} narration cues into {out_path}",
flush=True,
)
result = subprocess.run(cmd)
if result.returncode != 0:
print(f"[mux] ffmpeg exited {result.returncode}", file=sys.stderr)
return result.returncode
if args.replace:
args.out.replace(args.video)
print(f"[mux] replaced {args.video} with narrated copy", flush=True)
out_path.replace(video_path)
print(f"[mux] replaced {video_path} with narrated copy", flush=True)
return 0