perfect-postcode/video/src/preflight.ts
2026-05-12 22:00:56 +01:00

85 lines
3.1 KiB
TypeScript

import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { OUTPUT_DIR } from './config.js';
import type { Storyboard } from './script.js';
import { storyboards } from './storyboard.js';
/**
* Emit per-storyboard narration scripts for the synth step.
*
* Synth (tts/synth.py) runs BEFORE recording, so it needs the full ordered
* narration list — text + per-cue gaps + voice config — without depending
* on Playwright, the dashboard, or auth. Walk each storyboard's cues, write
* a flat manifest under `output/<name>/narration-script.json`, then write
* an index manifest at `output/storyboards.json` so render.sh knows which
* storyboard slugs to loop over.
*
* The cue index in each manifest is the source of truth: the runner later
* matches storyboard cues to measured durations by index.
*/
// Em/en-dashes and ellipses make Qwen3-TTS produce dramatic pauses, sighs,
// or audible breaths — the captions still render the original (unicode-rich)
// text from the storyboard; only the synth input is sanitised.
function normalizeForTts(text: string): string {
return text
.replace(/\s*[—–]\s*/g, ', ')
.replace(/…/g, '.')
.replace(/\.{3,}/g, '.')
.replace(/\s{2,}/g, ' ')
.trim();
}
function emitScript(storyboard: Storyboard): string {
const dir = join(OUTPUT_DIR, storyboard.name);
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
const items = storyboard.cues.map((cue, cueIndex) => ({
cueIndex,
text: normalizeForTts(cue.text),
gapBeforeMs: cue.gapBeforeMs,
}));
// The voice block is consumed by tts/synth.py — see _resolve_reference and
// the cache check there for which fields invalidate cached audio.
const manifest = {
storyboard: storyboard.name,
voice: {
instruct: storyboard.voice.instruct,
language: storyboard.voice.language,
referenceText: storyboard.voice.referenceText,
temperature: storyboard.voice.temperature ?? 0.6,
topP: storyboard.voice.topP ?? 0.9,
seed: storyboard.voice.seed ?? 42,
},
items,
};
const path = join(dir, 'narration-script.json');
writeFileSync(path, JSON.stringify(manifest, null, 2));
console.log(`[preflight] [${storyboard.name}] wrote ${items.length} cues → ${path}`);
return path;
}
function main(): void {
if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });
for (const sb of storyboards) emitScript(sb);
// Index for shell loops — each entry has every field render.sh needs to
// address per-storyboard outputs without re-parsing the TS source.
const index = {
storyboards: storyboards.map((sb) => ({
name: sb.name,
locale: sb.locale ?? sb.content.appLanguage,
aspect: sb.video.aspect,
outputFps: sb.video.outputFps,
minDurationS: sb.video.minDurationS,
maxDurationS: sb.video.maxDurationS,
posterTimeS: sb.video.posterTimeS,
})),
};
const indexPath = join(OUTPUT_DIR, 'storyboards.json');
writeFileSync(indexPath, JSON.stringify(index, null, 2));
console.log(`[preflight] wrote storyboard index → ${indexPath}`);
}
main();