perfect-postcode/video/src/preflight.ts

import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { OUTPUT_DIR } from './config.js';
import type { Storyboard } from './script.js';
import { storyboards } from './storyboard.js';

/**
 * Emit per-storyboard narration scripts for the synth step.
 *
 * Synth (tts/synth.py) runs BEFORE recording, so it needs the full ordered
 * narration list — text + per-cue gaps + voice config — without depending
 * on Playwright, the dashboard, or auth. Walk each storyboard's cues, write
 * a flat manifest under `output/<name>/narration-script.json`, then write
 * an index manifest at `output/storyboards.json` so render.sh knows which
 * storyboard slugs to loop over.
 *
 * The cue index in each manifest is the source of truth: the runner later
 * matches storyboard cues to measured durations by index.
 */
// Em/en-dashes and ellipses make Qwen3-TTS produce dramatic pauses, sighs,
// or audible breaths — the captions still render the original (unicode-rich)
// text from the storyboard; only the synth input is sanitised.
function normalizeForTts(text: string): string {
  return text
    .replace(/\s*[—–]\s*/g, ', ')
    .replace(/…/g, '.')
    .replace(/\.{3,}/g, '.')
    .replace(/\s{2,}/g, ' ')
    .trim();
}

function emitScript(storyboard: Storyboard): string {
  const dir = join(OUTPUT_DIR, storyboard.name);
  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });

  const items = storyboard.cues.map((cue, cueIndex) => ({
    cueIndex,
    text: normalizeForTts(cue.text),
    gapBeforeMs: cue.gapBeforeMs,
  }));

  // The voice block is consumed by tts/synth.py — see _resolve_reference and
  // the cache check there for which fields invalidate cached audio.
  const manifest = {
    storyboard: storyboard.name,
    voice: {
      instruct: storyboard.voice.instruct,
      language: storyboard.voice.language,
      referenceText: storyboard.voice.referenceText,
      temperature: storyboard.voice.temperature ?? 0.6,
      topP: storyboard.voice.topP ?? 0.9,
      seed: storyboard.voice.seed ?? 42,
    },
    items,
  };
  const path = join(dir, 'narration-script.json');
  writeFileSync(path, JSON.stringify(manifest, null, 2));
  console.log(`[preflight] [${storyboard.name}] wrote ${items.length} cues → ${path}`);
  return path;
}

function main(): void {
  if (!existsSync(OUTPUT_DIR)) mkdirSync(OUTPUT_DIR, { recursive: true });

  for (const sb of storyboards) emitScript(sb);

  // Index for shell loops — each entry has every field render.sh needs to
  // address per-storyboard outputs without re-parsing the TS source.
  const index = {
    storyboards: storyboards.map((sb) => ({
      name: sb.name,
      locale: sb.locale ?? sb.content.appLanguage,
      aspect: sb.video.aspect,
      outputFps: sb.video.outputFps,
      minDurationS: sb.video.minDurationS,
      maxDurationS: sb.video.maxDurationS,
      posterTimeS: sb.video.posterTimeS,
    })),
  };
  const indexPath = join(OUTPUT_DIR, 'storyboards.json');
  writeFileSync(indexPath, JSON.stringify(index, null, 2));
  console.log(`[preflight] wrote storyboard index → ${indexPath}`);
}

main();