This commit is contained in:
Andras Schmelczer 2026-05-26 19:45:13 +01:00
parent c645b0f1d4
commit 39ef5c6646
79 changed files with 5660 additions and 2199 deletions

View file

@ -10,6 +10,7 @@
"setup-auth": "tsc && node dist/auth.js",
"record": "tsc && node dist/record.js",
"verify-output": "tsc && node dist/verify.js",
"review": "./review.sh",
"render": "./render.sh"
},
"dependencies": {

View file

@ -14,9 +14,11 @@
# bootstrap step; you supply real account credentials.
#
# Usage:
# ./render.sh # local stack
# ./render.sh --prod # prod (requires LOGIN_EMAIL/LOGIN_PASSWORD)
# ./render.sh # local stack, English homepage landscape + portrait
# ./render.sh --prod # prod, English homepage landscape + portrait
# ./render.sh --target prod # same as --prod
# VIDEO_STORYBOARD_SET=ads ./render.sh --prod # render social ads instead
# VIDEO_STORYBOARD_SET=demo ./render.sh --prod # render every homepage locale
# ./render.sh --fresh-auth # force re-auth even if cache is fresh
# ./render.sh --resume # preserve completed recordings and continue
# ./render.sh --no-encode # stop at WebM, skip MP4 encode
@ -312,6 +314,20 @@ if [ "$DO_AUDIO" = "1" ]; then
say "Synchronising tts/ Python deps"
uv sync --project tts ${uv_sync_extras[@]+"${uv_sync_extras[@]}"} || fail "uv sync failed in video/tts"
if [ -z "${TTS_DEVICE:-}" ]; then
if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L >/dev/null 2>&1; then
gpu_free_mb="$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d ' ')"
if [ "${gpu_free_mb:-0}" -ge 8000 ]; then
export TTS_DEVICE="cuda:0"
else
export TTS_DEVICE="cpu"
say "GPU has ${gpu_free_mb:-0}MiB free; using CPU for TTS to avoid CUDA OOM"
fi
else
export TTS_DEVICE="cpu"
fi
fi
# Voice consistency: every ad in this set declares the same AD_VOICE
# (instruct/seed/temperature/topP/referenceText). Even with seed-locked
# VoiceDesign, independent invocations across processes can produce
@ -325,10 +341,17 @@ if [ "$DO_AUDIO" = "1" ]; then
# which it always does, because every ad shares AD_VOICE.
shared_ref_wav=""
shared_ref_meta=""
shared_audio_dir=""
for sb in "${STORYBOARDS[@]}"; do
if [ -n "$shared_ref_wav" ] && [ -f "$shared_ref_wav" ] && [ -f "$shared_ref_meta" ]; then
if [ -n "$shared_audio_dir" ] && [ -d "$shared_audio_dir" ]; then
mkdir -p "output/$sb/audio"
cp -f "$shared_ref_wav" "output/$sb/audio/_reference.wav"
for cached_audio_file in "$shared_audio_dir"/*.wav "$shared_audio_dir"/*.json; do
[ -f "$cached_audio_file" ] || continue
cp -f "$cached_audio_file" "output/$sb/audio/$(basename "$cached_audio_file")"
done
elif [ -n "$shared_ref_wav" ] && [ -f "$shared_ref_wav" ] && [ -f "$shared_ref_meta" ]; then
mkdir -p "output/$sb/audio"
cp -f "$shared_ref_wav" "output/$sb/audio/_reference.wav"
cp -f "$shared_ref_meta" "output/$sb/audio/_reference.meta.json"
fi
say "Synthesising narration for [$sb]"
@ -342,6 +365,9 @@ if [ "$DO_AUDIO" = "1" ]; then
shared_ref_meta="output/$sb/audio/_reference.meta.json"
say "Locked voice reference to $shared_ref_wav — reusing for the rest of the set"
fi
if [ -z "$shared_audio_dir" ] && [ -s "output/$sb/audio/index.json" ]; then
shared_audio_dir="output/$sb/audio"
fi
done
fi

117
video/review.sh Executable file
View file

@ -0,0 +1,117 @@
#!/usr/bin/env bash
#
# Extract visual and audio snippets from rendered homepage videos.
#
# Usage:
# ./review.sh # recording + recording-mobile
# ./review.sh recording ad-01-foo # explicit storyboard slugs
#
# Outputs land under output/review/current by default. Override REVIEW_DIR
# if you want to keep multiple passes side by side.
set -euo pipefail
cd "$(dirname "$0")"
REVIEW_DIR="${REVIEW_DIR:-output/review/current}"
mkdir -p "$REVIEW_DIR"
if [ "$#" -gt 0 ]; then
STORYBOARDS=("$@")
else
STORYBOARDS=(recording recording-mobile)
fi
for sb in "${STORYBOARDS[@]}"; do
src="output/$sb/recording.mp4"
if [ ! -s "$src" ]; then
echo "[review] missing rendered video: $src" >&2
exit 1
fi
width="$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=noprint_wrappers=1:nokey=1 "$src")"
height="$(ffprobe -v error -select_streams v:0 -show_entries stream=height -of default=noprint_wrappers=1:nokey=1 "$src")"
scale=360
poster_t=16
if [ "$height" -gt "$width" ]; then
scale=240
poster_t=12
fi
ffprobe -v error \
-select_streams v:0 \
-show_entries stream=codec_name,width,height,avg_frame_rate \
-show_entries format=duration,size \
-of default=noprint_wrappers=1 \
"$src" > "$REVIEW_DIR/$sb-ffprobe.txt"
ffmpeg -nostdin -y -loglevel warning -i "$src" \
-vf "fps=1/4,scale=${scale}:-1:flags=lanczos,tile=5x3:padding=12:margin=12:color=white" \
-frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-contact.jpg"
ffmpeg -nostdin -y -loglevel warning -i "$src" -ss "$poster_t" \
-frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-postercheck-t${poster_t}.jpg"
ffmpeg -nostdin -y -loglevel warning -i "$src" -t 12 \
-vn -ac 1 -ar 24000 "$REVIEW_DIR/$sb-audio-first12.wav"
done
while IFS=$'\t' read -r sb idx clip_start clip_dur midpoint; do
src="output/$sb/recording.mp4"
cue="$(printf '%02d' "$idx")"
ffmpeg -nostdin -y -loglevel warning -i "$src" -ss "$midpoint" \
-frames:v 1 -update 1 -q:v 2 "$REVIEW_DIR/$sb-cue-$cue-mid.jpg"
ffmpeg -nostdin -y -loglevel warning -ss "$clip_start" -i "$src" -t "$clip_dur" \
-c:v libx264 -pix_fmt yuv420p -crf 18 -preset veryfast \
-c:a aac -b:a 128k -movflags +faststart \
"$REVIEW_DIR/$sb-cue-$cue.mp4"
ffmpeg -nostdin -y -loglevel warning -ss "$clip_start" -i "$src" -t "$clip_dur" \
-vn -ac 1 -ar 24000 "$REVIEW_DIR/$sb-cue-$cue.wav"
done < <(node - "${STORYBOARDS[@]}" <<'NODE'
const fs = require('fs');
const storyboards = process.argv.slice(2);
const review = process.env.REVIEW_DIR || 'output/review/current';
for (const sb of storyboards) {
const narration = JSON.parse(fs.readFileSync(`output/${sb}/narration.json`, 'utf8'));
const audioPath = `output/${sb}/audio/index.json`;
const audio = fs.existsSync(audioPath)
? JSON.parse(fs.readFileSync(audioPath, 'utf8'))
: { items: [] };
const byCue = new Map((audio.items || []).map((item) => [Number(item.cueIndex), item]));
const rows = ['cueIndex\tstartS\tendS\tdurationS\tgapBeforeMs\twav\ttext'];
narration.cues.forEach((cue, i) => {
const item = byCue.get(i) || {};
const startMs = Number(cue.videoTimeMs);
const durationMs = Number(item.durationMs || cue.durationMs);
const endMs = startMs + durationMs;
rows.push([
i,
(startMs / 1000).toFixed(3),
(endMs / 1000).toFixed(3),
(durationMs / 1000).toFixed(3),
item.gapBeforeMs ?? '',
item.wav ?? '',
cue.text,
].join('\t'));
console.log([
sb,
i,
(Math.max(0, startMs - 250) / 1000).toFixed(3),
((durationMs + 500) / 1000).toFixed(3),
((startMs + durationMs / 2) / 1000).toFixed(3),
].join('\t'));
});
fs.writeFileSync(`${review}/${sb}-timing.tsv`, rows.join('\n') + '\n');
}
NODE
)
echo "[review] wrote snippets to $REVIEW_DIR"

View file

@ -46,6 +46,9 @@ export interface HexagonClickTarget {
type ApiKind = 'hexagons' | 'postcodes' | 'selection-stats' | 'tracked-api';
const SELECTION_PANE_SELECTOR =
'[data-tutorial="right-pane"], .fixed.inset-0.z-50:has(button[aria-label="Close drawer"])';
const TRACKED_API_PATHS = new Set([
'/api/ai-filters',
'/api/export',
@ -89,7 +92,8 @@ export class DashboardRecorder {
async waitForSelectionReady(afterSelectionVersion: number, timeoutMs = 12000): Promise<void> {
await this.page
.locator('[data-tutorial="right-pane"]')
.locator(SELECTION_PANE_SELECTOR)
.first()
.waitFor({ state: 'visible', timeout: timeoutMs });
await this.waitForStable({ afterSelectionVersion, timeoutMs });
}

View file

@ -116,6 +116,18 @@ export async function installCursor(page: Page): Promise<void> {
body.__demo-aspect-horizontal #__demo-caption {
bottom: 7%;
}
body.__demo-aspect-horizontal #__demo-caption.placement-side {
left: auto;
right: 3.4%;
bottom: 10%;
transform: translate(28px, 0);
max-width: min(560px, 30vw);
padding: 18px 22px;
border-radius: 18px;
font-size: 26px;
line-height: 1.18;
text-align: left;
}
/* Vertical default: upper-third. Kept compact so the map remains the
primary visual in the social ad cuts. */
body.__demo-aspect-vertical #__demo-caption {
@ -130,6 +142,9 @@ export async function installCursor(page: Page): Promise<void> {
opacity: 1;
transform: translate(-50%, 0);
}
body.__demo-aspect-horizontal #__demo-caption.placement-side.visible {
transform: translate(0, 0);
}
#__demo-outro {
position: fixed; inset: 0;
@ -565,13 +580,19 @@ export async function setAspectClass(
}, aspect);
}
export async function showCaption(page: Page, text: string): Promise<void> {
await page.evaluate((t) => {
export async function showCaption(
page: Page,
text: string,
placement?: 'side'
): Promise<void> {
await page.evaluate(({ t, placement }) => {
const el = document.getElementById('__demo-caption');
if (!el) return;
el.textContent = t;
el.classList.remove('placement-side');
if (placement) el.classList.add(`placement-${placement}`);
el.classList.add('visible');
}, text);
}, { t: text, placement });
}
/**

View file

@ -74,11 +74,10 @@ export async function smoothMove(
* "Fake" type: progressively set the textarea value, dispatching
* React-compatible input events.
*
* Cadence is generated as a per-char weight ratio (so spaces and punctuation
* read as natural pauses), then **rescaled** so that the sum of delays equals
* `totalDurationMs` exactly. The runner depends on this: it budgets a
* specific number of ms for the type step, and any divergence would cascade
* into narration drift.
* Do not do one Playwright round-trip per character here. Long prompts can
* turn a 4s typing budget into 9s of wall-clock time on a busy recorder.
* Instead, animate through paced chunks. It still reads as typing on video,
* but the runner can keep narration and visuals aligned.
*/
export async function fakeType(
page: Page,
@ -86,17 +85,19 @@ export async function fakeType(
text: string,
totalDurationMs: number
): Promise<void> {
const steps = text.length;
if (steps === 0) {
if (text.length === 0) {
if (totalDurationMs > 0) await sleep(totalDurationMs);
return;
}
const weights = computeTypingWeights(text);
const weightSum = weights.reduce((a, b) => a + b, 0);
const msPerWeight = totalDurationMs / weightSum;
const steps = Math.min(
text.length,
Math.max(1, Math.min(48, Math.round(totalDurationMs / 95)))
);
const startedAt = Date.now();
for (let i = 1; i <= steps; i++) {
const charCount = Math.max(1, Math.round((i / steps) * text.length));
await page.evaluate(
({ selector, value }) => {
const ta = document.querySelector(selector) as HTMLTextAreaElement | null;
@ -110,27 +111,16 @@ export async function fakeType(
setValue.call(ta, value);
ta.dispatchEvent(new Event('input', { bubbles: true }));
},
{ selector, value: text.slice(0, i) }
{ selector, value: text.slice(0, charCount) }
);
if (i < steps) {
const ms = Math.max(0, Math.round(weights[i - 1] * msPerWeight));
if (ms > 0) await sleep(ms);
const targetElapsed = (totalDurationMs * i) / steps;
const waitMs = startedAt + targetElapsed - Date.now();
if (waitMs > 0) await sleep(waitMs);
}
}
}
function computeTypingWeights(text: string): number[] {
const cadence = [0.82, 1.08, 0.94, 1.22, 0.88, 1.14, 0.98, 1.28];
return Array.from(text, (char, index) => {
let weight = cadence[index % cadence.length];
if (char === ' ') weight += 0.9;
if (/[,.!?;:]/.test(char)) weight += 1.8;
const next = text[index + 1];
if (next === ' ' && index % 4 === 0) weight += 0.55;
return weight;
});
}
/**
* Drag the right-hand thumb of a Radix slider to a target track fraction.
* Returns the final cursor position so callers can chain a smoothMove afterwards.

View file

@ -101,7 +101,7 @@ async function runCue(
videoTimeMs: cursor.ms + leadInMs,
durationMs: measuredAudioMs,
});
await showCaption(ctx.page, cue.text);
await showCaption(ctx.page, cue.text, cue.captionPlacement);
const during = cue.during ?? [];
const declaredSum = during.reduce((s, a) => s + a.durationMs, 0);
@ -184,7 +184,36 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
return;
}
case 'click': {
const to = await resolveTarget(ctx, step.target);
const selectionVersion = ctx.dashboard.getSelectionStatsVersion();
const candidates =
step.target.kind === 'hexagon' && step.waitForSelectionReady
? await ctx.dashboard.visibleHexagonTargets(4)
: [await resolveTarget(ctx, step.target)];
let lastError: unknown = null;
for (let i = 0; i < candidates.length; i++) {
const to = candidates[i];
const moveMs = Math.max(120, Math.round(step.durationMs * 0.7));
await smoothMove(ctx.page, ctx.cursor, to, { durationMs: moveMs });
ctx.cursor = to;
await ctx.page.mouse.click(to.x, to.y);
if (!step.waitForSelectionReady) return;
try {
await ctx.dashboard.waitForSelectionReady(
selectionVersion,
Math.min(step.timeoutMs ?? 12000, i === candidates.length - 1 ? 12000 : 4000)
);
return;
} catch (err) {
lastError = err;
}
}
throw lastError ?? new Error('Click did not open the selection pane');
}
case 'clickIfVisible': {
const to = await tryResolveTarget(ctx, step.target, step.timeoutMs ?? 700);
if (!to) return;
const moveMs = Math.max(120, Math.round(step.durationMs * 0.7));
await smoothMove(ctx.page, ctx.cursor, to, { durationMs: moveMs });
ctx.cursor = to;
@ -196,16 +225,109 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
return;
case 'mapZoom': {
const point = await resolveTarget(ctx, step.target);
await ctx.page.mouse.move(point.x, point.y);
const perStepMs = Math.floor(step.durationMs / Math.max(1, step.steps));
const mapVersion = ctx.dashboard.getMapDataVersion();
const delta = step.direction === 'out' ? -MAP_ZOOM_WHEEL_DELTA : MAP_ZOOM_WHEEL_DELTA;
for (let i = 0; i < step.steps; i++) {
await ctx.page.mouse.wheel(0, delta);
if (perStepMs > 0) await sleep(perStepMs);
const handled = await ctx.page.evaluate(
async ({ x, y, steps, durationMs, direction }) => {
const root = document.querySelector('.maplibregl-map') as HTMLElement | null;
const fiberKey = root
? Object.getOwnPropertyNames(root).find((key) => key.startsWith('__reactFiber$'))
: undefined;
let fiber = fiberKey ? (root as unknown as Record<string, unknown>)[fiberKey] : null;
let mapRef: unknown = null;
while (fiber && typeof fiber === 'object') {
const maybeFiber = fiber as {
ref?: { current?: unknown };
return?: unknown;
};
const current = maybeFiber.ref?.current;
if (
current &&
typeof current === 'object' &&
typeof (current as { getMap?: unknown }).getMap === 'function'
) {
mapRef = current;
break;
}
fiber = maybeFiber.return ?? null;
}
const map = (mapRef as { getMap?: () => unknown } | null)?.getMap?.();
if (!map || typeof map !== 'object') return false;
const mapApi = map as {
getCanvas: () => HTMLCanvasElement;
getZoom: () => number;
getMinZoom?: () => number;
getMaxZoom?: () => number;
unproject: (point: [number, number]) => unknown;
zoomTo: (
zoom: number,
options: { around?: unknown; duration?: number; essential?: boolean }
) => void;
};
if (
typeof mapApi.getCanvas !== 'function' ||
typeof mapApi.getZoom !== 'function' ||
typeof mapApi.unproject !== 'function' ||
typeof mapApi.zoomTo !== 'function'
) {
return false;
}
const rect = mapApi.getCanvas().getBoundingClientRect();
const around = mapApi.unproject([x - rect.left, y - rect.top]);
const sign = direction === 'out' ? -1 : 1;
const zoomDelta = Math.max(0.25, Math.min(5.2, steps * 0.28)) * sign;
const minZoom = mapApi.getMinZoom?.() ?? 0;
const maxZoom = mapApi.getMaxZoom?.() ?? 22;
const targetZoom = Math.max(minZoom, Math.min(maxZoom, mapApi.getZoom() + zoomDelta));
mapApi.zoomTo(targetZoom, { around, duration: durationMs, essential: true });
await new Promise((resolve) => window.setTimeout(resolve, durationMs));
return true;
},
{
x: point.x,
y: point.y,
steps: step.steps,
durationMs: step.durationMs,
direction: step.direction,
}
);
if (!handled) {
const perStepMs = Math.floor(step.durationMs / Math.max(1, step.steps));
await ctx.page.evaluate(
async ({ x, y, steps, durationMs, delta }) => {
const wait = (ms: number) =>
new Promise<void>((resolve) => window.setTimeout(resolve, ms));
const perStep = Math.floor(durationMs / Math.max(1, steps));
for (let i = 0; i < steps; i++) {
const target = document.elementFromPoint(x, y) ?? document.querySelector('canvas');
target?.dispatchEvent(
new WheelEvent('wheel', {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
deltaY: delta,
deltaMode: WheelEvent.DOM_DELTA_PIXEL,
view: window,
})
);
if (perStep > 0) await wait(perStep);
}
},
{ x: point.x, y: point.y, steps: step.steps, durationMs: step.durationMs, delta }
);
if (perStepMs > 0) await sleep(0);
}
if (step.waitForMapSettled) {
await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
}
return;
}
case 'dragSlider':
case 'dragSlider': {
const mapVersion = ctx.dashboard.getMapDataVersion();
ctx.cursor = await smoothDragSliderThumb(
ctx.page,
step.thumbSelector,
@ -214,12 +336,21 @@ async function runActivity(ctx: ScriptCtx, step: Activity): Promise<void> {
step.toFraction,
step.durationMs
);
if (step.waitForMapSettled) {
await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
}
return;
case 'submitForm':
}
case 'submitForm': {
const mapVersion = ctx.dashboard.getMapDataVersion();
await ctx.page.evaluate((selector) => {
document.querySelector<HTMLFormElement>(selector)?.requestSubmit();
}, step.formSelector);
if (step.waitForMapSettled) {
await ctx.dashboard.waitForMapSettled(mapVersion, step.timeoutMs ?? 12000);
}
return;
}
case 'showOutro':
await showOutro(ctx.page, step.brand, step.tagline, step.url);
return;
@ -269,6 +400,30 @@ async function resolveTarget(
return { x: box.x + box.width / 2, y: box.y + box.height / 2 };
}
async function tryResolveTarget(
ctx: ScriptCtx,
target: Target,
timeoutMs: number
): Promise<{ x: number; y: number } | null> {
if (target.kind !== 'element') {
try {
return await resolveTarget(ctx, target);
} catch {
return null;
}
}
const locator = ctx.page.locator(target.selector).first();
try {
await locator.waitFor({ state: 'visible', timeout: timeoutMs });
const box = await locator.boundingBox({ timeout: timeoutMs });
if (!box) return null;
return { x: box.x + box.width / 2, y: box.y + box.height / 2 };
} catch {
return null;
}
}
/**
* Load synth's measured cue durations. Falls back to a worst-case estimate
* if the manifest is missing that path is only used for ``--no-audio``

View file

@ -118,7 +118,15 @@ export type Activity =
/** Slide the cursor from its current position to `target`. */
| { kind: 'moveCursor'; target: Target; durationMs: number }
/** Move + click + ripple. `durationMs` is the whole gesture, including settle. */
| { kind: 'click'; target: Target; durationMs: number }
| {
kind: 'click';
target: Target;
durationMs: number;
waitForSelectionReady?: boolean;
timeoutMs?: number;
}
/** Move + click when the target is visible; skip without failing otherwise. */
| { kind: 'clickIfVisible'; target: Target; durationMs: number; timeoutMs?: number }
/** Type into a textarea/input over exactly `durationMs`. */
| { kind: 'type'; selector: string; text: string; durationMs: number }
/** Grow or shrink the visible cursor (CSS scale). */
@ -135,6 +143,8 @@ export type Activity =
steps: number;
durationMs: number;
direction?: 'in' | 'out';
waitForMapSettled?: boolean;
timeoutMs?: number;
}
/** Drag the right thumb of a Radix slider to a fraction in [0,1]. */
| {
@ -143,9 +153,17 @@ export type Activity =
trackSelector: string;
toFraction: number;
durationMs: number;
waitForMapSettled?: boolean;
timeoutMs?: number;
}
/** Submit a form found by selector and wait `durationMs`. */
| { kind: 'submitForm'; formSelector: string; durationMs: number }
| {
kind: 'submitForm';
formSelector: string;
durationMs: number;
waitForMapSettled?: boolean;
timeoutMs?: number;
}
/** Reveal the closing brand card. */
| { kind: 'showOutro'; brand: string; tagline: string; url: string; durationMs: number }
/** Reveal a full-screen ad-style overlay over the live map. */
@ -182,6 +200,8 @@ export type Activity =
*/
export interface Cue {
text: string;
/** Optional cue-specific caption layout for shots where the default lower-third hides the product. */
captionPlacement?: 'side';
gapBeforeMs: number;
during?: Activity[];
tail?: Activity[];

View file

@ -40,7 +40,7 @@ type FormFactor = 'desktop' | 'mobile';
// most prominent thing on screen (it sits at the top of the bottom
// sheet which covers ~44% of the viewport), so we skip the wrapper zoom
// entirely — see buildPre().
const AI_ZOOM_SCALE_DESKTOP = 2.4;
const AI_ZOOM_SCALE_DESKTOP = 2.05;
const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]';
const TT_SLIDER_MAX = 120;
@ -54,14 +54,16 @@ const TT_DRAG_TO_MIN = 20;
// sheet, not the map).
const MAP_FOCUS_DESKTOP = vfrac(1140 / 1920, 605 / 1080);
const MAP_FOCUS_MOBILE = vfrac(0.5, 0.3);
const HOMEPAGE_RIGHT_PANE_SELECTOR =
'[data-tutorial="right-pane"], .fixed.inset-0.z-50:has(button[aria-label="Close drawer"])';
// Mobile mapZoom intensity. 6 wheel-steps from the initial zoom (12)
// lands around zoom 14.5 — postcode polygons clearly visible, individual
// streets named, hex aggregation broken open. The previous 18-step
// drill ended past zoom 20 (street-level vector tiles only), so the
// click landed on featureless terrain.
const MOBILE_MAP_ZOOM_STEPS = 6;
const MOBILE_MAP_ZOOM_MS = 1400;
// Mobile mapZoom intensity. Keep mobile below the old 18-step drill that
// overshot into featureless street-level tiles, but make the homepage pass
// visibly break from city blobs into postcode/street scale.
const MOBILE_MAP_ZOOM_STEPS = 9;
const MOBILE_MAP_ZOOM_MS = 2200;
const DESKTOP_MAP_ZOOM_STEPS = 18;
const DESKTOP_MAP_ZOOM_MS = 4300;
type RecordingLocale = 'en' | 'de' | 'zh' | 'hi';
@ -74,6 +76,7 @@ interface RecordingLocalization {
promptText: string;
travelTimeLabel: string;
exportButtonTitle: string;
colourMapTitle: string;
brand: {
name: string;
tagline: string;
@ -84,6 +87,8 @@ interface RecordingLocalization {
prompt: string;
dashboard: string;
filters: string;
zoom: string;
open: string;
details: string;
shortlist: string;
};
@ -105,22 +110,29 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
'strong Manchester accent.',
voiceReferenceText:
"Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.",
promptText: 'Flat under £300k, 35 min to Manchester, good schools, low crime, quiet streets',
promptText:
'First home under £315k, 35 min to Manchester, good schools, check crime, road noise, tree cover, fast broadband',
travelTimeLabel: 'Manchester city centre',
exportButtonTitle: 'Export to Excel',
colourMapTitle: 'Colour map',
brand: {
name: 'Perfect Postcode',
tagline: 'Know where to look before listings take over.',
tagline: 'Find the area before the house.',
url: BRAND_URL,
},
cues: {
describe: "Don't pick a home by scrolling listings.",
describe: 'A Manchester first-time buyer wants to stop wasting Saturdays on the wrong streets.',
prompt:
'Describe what you want. Budget, commute, schools, whatever matters.',
dashboard: 'The map lights up with every postcode in England that fits.',
filters: 'Move one slider. The map answers instantly.',
details: 'Open any postcode. Sold prices. Schools. Crime. Noise. All on one screen.',
shortlist: 'Take your shortlist to the listings. Now you know where to search.',
'They type the whole brief: under £315k, thirty-five minutes to town, good schools, low crime, quieter roads, trees, and fast broadband.',
dashboard:
'The map keeps only the postcodes that match. The rest of the country drops away.',
filters:
'Now tweak it: cut the commute to twenty minutes and colour the map by travel time.',
zoom: 'Zoom in until the blobs become streets, parks, and postcode blocks.',
open: 'Open one block that still passes the filters.',
details:
'On the right, you can see why it passed: journey time, listing links, Street View, sold prices, schools, crime, the noise number, and the tree score.',
shortlist: 'Export those postcodes and only search there.',
},
},
de: {
@ -131,26 +143,29 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
'Calm and cheerful German male narrator with clear standard German pronunciation ' +
'and a friendly, practical delivery.',
voiceReferenceText:
'Willkommen zur Demonstration. Diese Sprecherstimme hören Sie im gesamten Video.',
'Willkommen zur Demonstration. Diese Sprecherstimme hörst du im gesamten Video.',
promptText:
'Wohnung unter £300k, 35 Min. nach Manchester, gute Schulen, niedrige Kriminalität, ruhige Straßen',
travelTimeLabel: 'Stadtzentrum Manchester',
exportButtonTitle: 'Als Excel exportieren',
colourMapTitle: 'Karte einfärben',
brand: {
name: 'Perfect Postcode',
tagline: 'Wissen, wo Sie suchen sollten, bevor Inserate Ihre Suche bestimmen.',
tagline: 'Wissen, wo du suchen solltest, bevor Inserate deine Suche bestimmen.',
url: BRAND_URL,
},
cues: {
describe: 'Wählen Sie kein Zuhause durch endloses Scrollen.',
describe: 'Wähle kein Zuhause durch endloses Scrollen.',
prompt:
'Beschreiben Sie, was Ihnen wichtig ist. Budget, Pendelzeit, Schulen, alles.',
'Beschreibe, was dir wichtig ist. Budget, Pendelzeit, Schulen, alles.',
dashboard: 'Die Karte zeigt jede passende Postleitzahl in ganz England.',
filters: 'Ein Regler bewegt sich. Die Karte antwortet sofort.',
zoom: 'Jetzt von der Stadtansicht bis zu echten Straßen zoomen.',
open: 'Öffne einen Treffer und sieh, warum er übrig bleibt.',
details:
'Öffnen Sie eine Postleitzahl. Preise. Schulen. Kriminalität. Lärm. Alles auf einer Karte.',
'Öffne eine Postleitzahl. Preise. Schulen. Kriminalität. Lärm. Alles auf einer Karte.',
shortlist:
'Mit dieser Auswahl zu den Inseraten. Sie wissen jetzt, wo Sie suchen sollen.',
'Mit dieser Auswahl zu den Inseraten. Du weißt jetzt, wo du suchen sollst.',
},
},
zh: {
@ -164,6 +179,7 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
promptText: '30万英镑以内的公寓35分钟到曼彻斯特学校好犯罪率低街道安静',
travelTimeLabel: '曼彻斯特市中心',
exportButtonTitle: '导出为 Excel',
colourMapTitle: '为地图着色',
brand: {
name: 'Perfect Postcode',
tagline: '先知道该看哪里,再让房源牵着你走。',
@ -174,6 +190,8 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
prompt: '用日常话告诉地图你想要的家。预算、通勤、学校,什么都行。',
dashboard: '地图点亮每一个符合条件的英格兰邮编。',
filters: '动一个滑块,地图立刻给答案。',
zoom: '现在从城市范围放大到真实街道。',
open: '打开一个匹配项,看看它为什么留下来。',
details: '打开任意邮编。成交价、学校、犯罪率、噪音,一目了然。',
shortlist: '带着这份清单去房源网站。现在你知道该在哪儿找了。',
},
@ -190,6 +208,7 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
promptText: 'Flat under £300k, 35 min to Manchester, good schools, low crime, quiet streets',
travelTimeLabel: 'Manchester city centre',
exportButtonTitle: 'Excel में निर्यात करें',
colourMapTitle: 'नक्शे को रंगें',
brand: {
name: 'Perfect Postcode',
tagline: 'Know where to look before listings take over.',
@ -201,6 +220,8 @@ const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> =
'Describe what you want. Budget, commute, schools, whatever matters.',
dashboard: 'The map lights up with every postcode in England that fits.',
filters: 'Move one slider. The map answers instantly.',
zoom: 'Now zoom in from the city pattern to actual streets.',
open: 'Open one match and see why it made the cut.',
details: 'Open any postcode. Sold prices. Schools. Crime. Noise. All on one screen.',
shortlist: 'Take your shortlist to the listings. Now you know where to search.',
},
@ -211,25 +232,23 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
const copy = RECORDING_LOCALIZATIONS[locale];
const isMobile = formFactor === 'mobile';
const mapFocus = isMobile ? MAP_FOCUS_MOBILE : MAP_FOCUS_DESKTOP;
const mapZoomSteps = isMobile ? MOBILE_MAP_ZOOM_STEPS : 18;
const mapZoomMs = isMobile ? MOBILE_MAP_ZOOM_MS : 1500;
// Click target stays at the mapZoom focus point. On mobile we kept the
// zoom shallow (6 wheel-steps → ~zoom 14.5) specifically so the centre
// of the visible map area lands on a real postcode polygon at that
// depth; using a vfrac target is deterministic and avoids needing a
// `[data-tutorial="map"]` anchor in the MobileMapPage DOM (it has
// none — that attribute lives only on DesktopMapPage).
const clickTarget = mapFocus;
const mapZoomSteps = isMobile ? MOBILE_MAP_ZOOM_STEPS : DESKTOP_MAP_ZOOM_STEPS;
const mapZoomMs = isMobile ? MOBILE_MAP_ZOOM_MS : DESKTOP_MAP_ZOOM_MS;
const colourTravelTime = el(`${TT_CARD_SELECTOR} button[title="${copy.colourMapTitle}"]`);
const postcodeDemoTarget = isMobile
? vfrac(320 / 540, 255 / 960)
: vfrac(1087 / 1920, 520 / 1080);
const openPostcodeTarget = postcodeDemoTarget;
const zoomPostcodeTarget = postcodeDemoTarget;
const cursorParkTarget = isMobile ? vfrac(0.12, 0.61) : vfrac(0.12, 0.18);
const definingCharacteristicsSelector =
'[data-tutorial="right-pane"] button:has-text("Defining characteristics"), ' +
'.fixed.inset-0.z-50:has(button[aria-label="Close drawer"]) button:has-text("Defining characteristics")';
// Cue 5 (shortlist) on mobile: the Export button lives inside the
// hidden hamburger menu, not in the header — opening it cleanly would
// need a localised aria-label lookup. Instead we pull the map back
// out to the filtered overview so the cut ends on a satisfying wide
// shot of the matching postcodes rather than the post-click zoom.
const shortlistActivities: Storyboard['cues'][number]['during'] =
formFactor === 'desktop'
? [
{ kind: 'zoomReset', durationMs: 900 },
{ kind: 'zoomReset', durationMs: 800 },
{
kind: 'click',
target: el(`button[title="${copy.exportButtonTitle}"]`),
@ -237,14 +256,19 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
},
]
: [
// Reverse the cue-4 zoom-in exactly so we land back on the
// initial filtered dashboard view (hexagons visible).
{
kind: 'click',
target: el('button[aria-label="Close drawer"]'),
durationMs: 650,
},
{
kind: 'mapZoom',
target: mapFocus,
steps: MOBILE_MAP_ZOOM_STEPS,
durationMs: MOBILE_MAP_ZOOM_MS,
direction: 'out',
waitForMapSettled: true,
timeoutMs: 12000,
},
];
@ -252,7 +276,17 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
{
text: copy.cues.describe,
gapBeforeMs: 0,
tail: [{ kind: 'wait', durationMs: 250 }],
during: isMobile
? [{ kind: 'wait', durationMs: 700 }]
: [
{
kind: 'zoomTo',
target: el('[data-tutorial="ai-filters"]'),
scale: AI_ZOOM_SCALE_DESKTOP,
durationMs: 900,
},
],
tail: [{ kind: 'wait', durationMs: 150 }],
},
{
text: copy.cues.prompt,
@ -262,17 +296,25 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
kind: 'type',
selector: '[data-tutorial="ai-filters"] textarea',
text: copy.promptText,
durationMs: 3000,
durationMs: 4300,
},
{ kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1200 },
],
tail: [{ kind: 'wait', durationMs: 500 }],
tail: [{ kind: 'wait', durationMs: 120 }],
},
{
text: copy.cues.dashboard,
gapBeforeMs: 300,
during: [{ kind: 'zoomReset', durationMs: 1400 }],
tail: [{ kind: 'wait', durationMs: 500 }],
during: [
{
kind: 'submitForm',
formSelector: '[data-tutorial="ai-filters"] form',
durationMs: 2200,
waitForMapSettled: true,
timeoutMs: 15000,
},
{ kind: 'zoomReset', durationMs: 900 },
],
tail: [{ kind: 'wait', durationMs: 300 }],
},
{
@ -284,45 +326,106 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
durationMs: 1000,
durationMs: 1800,
},
{ kind: 'click', target: colourTravelTime, durationMs: 750 },
],
tail: [{ kind: 'wait', durationMs: 400 }],
tail: [{ kind: 'wait', durationMs: 350 }],
},
{
text: copy.cues.details,
text: copy.cues.zoom,
gapBeforeMs: 500,
during: [
{ kind: 'cursorScale', scale: 1.4, durationMs: 200 },
{
kind: 'mapZoom',
target: mapFocus,
target: zoomPostcodeTarget,
steps: mapZoomSteps,
durationMs: mapZoomMs,
},
],
tail: [
// Wait for the post-zoom /api/postcodes response and a redraw
// before the click — otherwise the click can fire on a stale
// frame and miss the polygon.
{ kind: 'wait', durationMs: 500 },
{ kind: 'moveCursor', target: cursorParkTarget, durationMs: 250 },
{ kind: 'wait', durationMs: 120 },
],
},
{
text: copy.cues.open,
gapBeforeMs: 200,
during: [
{
kind: 'click',
target: clickTarget,
durationMs: 700,
target: openPostcodeTarget,
durationMs: 1200,
waitForSelectionReady: true,
timeoutMs: 6000,
},
{ kind: 'cursorScale', scale: 1, durationMs: 280 },
// Linger so the climax cue lands on the right-pane reveal.
{ kind: 'wait', durationMs: 1500 },
{ kind: 'cursorScale', scale: 1, durationMs: 250 },
],
tail: [{ kind: 'wait', durationMs: 300 }],
},
{
text: copy.cues.details,
captionPlacement: isMobile ? undefined : 'side',
gapBeforeMs: 250,
during: isMobile
? [
{
kind: 'scrollPane',
selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
top: 430,
durationMs: 900,
},
{
kind: 'clickIfVisible',
target: el(definingCharacteristicsSelector),
durationMs: 650,
timeoutMs: 700,
},
{
kind: 'scrollPane',
selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
top: 700,
durationMs: 850,
},
]
: [
{
kind: 'zoomTo',
target: el(HOMEPAGE_RIGHT_PANE_SELECTOR),
scale: 1.35,
durationMs: 950,
},
{
kind: 'scrollPane',
selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
top: 360,
durationMs: 850,
},
{
kind: 'clickIfVisible',
target: el(definingCharacteristicsSelector),
durationMs: 650,
timeoutMs: 700,
},
{
kind: 'scrollPane',
selector: HOMEPAGE_RIGHT_PANE_SELECTOR,
top: 920,
durationMs: 850,
},
],
tail: [{ kind: 'wait', durationMs: 700 }],
},
{
text: copy.cues.shortlist,
gapBeforeMs: 500,
during: shortlistActivities,
tail: [{ kind: 'wait', durationMs: 800 }],
tail: [{ kind: 'wait', durationMs: 650 }],
},
{
@ -344,26 +447,14 @@ function createCues(locale: RecordingLocale, formFactor: FormFactor): Storyboard
function buildPre(formFactor: FormFactor): Storyboard['pre'] {
if (formFactor === 'mobile') {
// Mobile skips the wrapper-zoom into the AI card. On a 540-wide
// viewport the bottom sheet already occupies ~44% of the screen
// and the AI card sits at the top of it — leaning further in would
// overflow the card width and crop the placeholder. We just clear
// the vignette and let the typing draw the eye.
return [
{ kind: 'clearVignette', durationMs: 0 },
{ kind: 'wait', durationMs: 400 },
{ kind: 'wait', durationMs: 120 },
];
}
return [
{ kind: 'clearVignette', durationMs: 0 },
{ kind: 'wait', durationMs: 200 },
{
kind: 'zoomTo',
target: el('[data-tutorial="ai-filters"]'),
scale: AI_ZOOM_SCALE_DESKTOP,
durationMs: 1300,
},
{ kind: 'wait', durationMs: 140 },
{ kind: 'wait', durationMs: 120 },
];
}
@ -452,10 +543,13 @@ function createRecordingStoryboard(
// Filters returned by the AI stub. Keys MUST match real feature names
// from /api/features (verified against the running server's schema).
stubbedFilters: {
'Property type': ['Flats/Maisonettes'],
'Estimated current price': [0, 300000],
'Serious crime per 1k residents (avg/yr)': [0, 55],
'Outstanding primary schools within 2km': [1, 10],
'Property type': ['Flats/Maisonettes', 'Semi-Detached'],
'Estimated current price': [0, 315000],
'Serious crime per 1k residents (avg/yr)': [0, 70],
'Good+ primary schools within 2km': [1, 10],
'Noise (dB)': [50, 70],
'Street tree density percentile': [25, 100],
'Max available download speed (Mbps)': ['100', '300', '1000'],
},
// Travel-time filters returned by the AI stub. Slug matches the real
// /api/travel-destinations?mode=transit response.
@ -481,6 +575,10 @@ function createRecordingStoryboard(
const RECORDING_LOCALES: readonly RecordingLocale[] = ['en', 'de', 'zh', 'hi'];
const RECORDING_FORM_FACTORS: readonly FormFactor[] = ['desktop', 'mobile'];
const ENGLISH_HOMEPAGE_STORYBOARDS: Storyboard[] = RECORDING_FORM_FACTORS.map((formFactor) =>
createRecordingStoryboard('en', formFactor)
);
const DEMO_STORYBOARDS: Storyboard[] = RECORDING_LOCALES.flatMap((locale) =>
RECORDING_FORM_FACTORS.map((formFactor) => createRecordingStoryboard(locale, formFactor))
);
@ -1271,14 +1369,21 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
const AD_STORYBOARDS = AD_CONFIGS.map(createDemoAdStoryboard);
const STORYBOARD_SET = process.env.VIDEO_STORYBOARD_SET ?? 'ads';
const STORYBOARD_SET = process.env.VIDEO_STORYBOARD_SET ?? 'homepage-en';
export const storyboards: Storyboard[] =
STORYBOARD_SET === 'demo'
? DEMO_STORYBOARDS
: STORYBOARD_SET === 'all'
? [...AD_STORYBOARDS, ...DEMO_STORYBOARDS]
: AD_STORYBOARDS;
export const storyboards: Storyboard[] = (() => {
switch (STORYBOARD_SET) {
case 'homepage-en':
return ENGLISH_HOMEPAGE_STORYBOARDS;
case 'demo':
return DEMO_STORYBOARDS;
case 'all':
return [...AD_STORYBOARDS, ...DEMO_STORYBOARDS];
case 'ads':
default:
return AD_STORYBOARDS;
}
})();
export function getStoryboard(name: string): Storyboard {
const sb = storyboards.find((s) => s.name === name);

View file

@ -169,7 +169,6 @@ def main() -> int:
"aac",
"-b:a",
"192k",
"-shortest",
"-movflags",
"+faststart",
str(out_path),

View file

@ -168,6 +168,61 @@ def cached_index_matches(
return True
def load_reusable_items(
index_path: Path,
cues: list[dict],
instruct: str,
language: str,
reference_text: str,
design_model: str,
clone_model: str,
reference_audio: str,
seed: int,
temperature: float,
top_p: float,
) -> dict[int, dict]:
"""Return cue-indexed cached items that match the current synth settings.
Unlike ``cached_index_matches`` this accepts a partial index, so a long
CPU synthesis run can be resumed cue-by-cue after an interruption.
"""
if not index_path.exists():
return {}
try:
cached = json.loads(index_path.read_text())
except json.JSONDecodeError:
return {}
if cached.get("instruct") != instruct or cached.get("language") != language:
return {}
if cached.get("referenceText") != reference_text:
return {}
if cached.get("designModel") != design_model or cached.get("cloneModel") != clone_model:
return {}
if cached.get("referenceAudio", "") != reference_audio:
return {}
if int(cached.get("seed", -1)) != seed:
return {}
if float(cached.get("temperature", -1)) != temperature:
return {}
if float(cached.get("topP", -1)) != top_p:
return {}
cue_by_index = {int(c["cueIndex"]): c for c in cues}
reusable: dict[int, dict] = {}
for item in cached.get("items", []):
cue_index = int(item.get("cueIndex", -1))
cue = cue_by_index.get(cue_index)
wav = item.get("wav")
if cue is None or not wav or not (index_path.parent / wav).exists():
continue
if cue["text"].strip() != str(item.get("text", "")).strip():
continue
if int(cue.get("gapBeforeMs", 0)) != int(item.get("gapBeforeMs", -1)):
continue
reusable[cue_index] = item
return reusable
def seed_everything(seed: int) -> None:
random.seed(seed)
np.random.seed(seed)
@ -333,34 +388,74 @@ def main() -> int:
)
print(
f"[synth] cloning {len(texts)} cues from reference (x_vector_only) — one batched call",
f"[synth] cloning {len(texts)} cues from reference (x_vector_only)",
flush=True,
)
for i, t in enumerate(texts):
print(f"[synth] {i:2d}: {t}", flush=True)
clone_model = load_model(args.clone_model, args.device)
seed_everything(seed)
wavs, sr = clone_model.generate_voice_clone(
text=texts,
language=language,
ref_audio=str(ref_wav_path),
ref_text=ref_text,
x_vector_only_mode=True,
non_streaming_mode=True,
do_sample=True,
temperature=temperature,
top_p=top_p,
out_index_base = {
"storyboard": args.storyboard,
"instruct": instruct,
"language": language,
"designModel": args.design_model,
"cloneModel": args.clone_model,
"referenceAudio": reference_audio_cache_key,
"referenceText": ref_text,
"seed": seed,
"temperature": temperature,
"topP": top_p,
}
index_path = audio_dir / "index.json"
reusable = load_reusable_items(
index_path,
cues,
instruct,
language,
reference_text,
args.design_model,
args.clone_model,
reference_audio_cache_key,
seed,
temperature,
top_p,
)
if len(wavs) != len(texts):
print(
f"[synth] model returned {len(wavs)} wavs for {len(texts)} cues",
file=sys.stderr,
)
return 1
def write_index(items: list[dict]) -> None:
index_path.write_text(json.dumps({**out_index_base, "items": items}, indent=2))
items = []
for cue, audio in zip(cues, wavs):
for cue_index, cue in enumerate(cues):
cached_item = reusable.get(int(cue["cueIndex"]))
if cached_item:
items.append(cached_item)
write_index(items)
print(
f"[synth] reusing {cached_item['wav']} {int(cached_item['durationMs']):>5d}ms «{cue['text']}»",
flush=True,
)
continue
seed_everything(seed + cue_index)
wavs, sr = clone_model.generate_voice_clone(
text=[texts[cue_index]],
language=language,
ref_audio=str(ref_wav_path),
ref_text=ref_text,
x_vector_only_mode=True,
non_streaming_mode=True,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
if len(wavs) != 1:
print(
f"[synth] model returned {len(wavs)} wavs for cue {cue_index}",
file=sys.stderr,
)
return 1
audio = wavs[0]
if hasattr(audio, "cpu"):
audio = audio.cpu().float().numpy()
wav_name = f"cue_{cue['cueIndex']:03d}.wav"
@ -377,25 +472,13 @@ def main() -> int:
"durationMs": duration_ms,
}
)
write_index(items)
print(
f"[synth] wrote {wav_name} {duration_ms:>5d}ms «{cue['text']}»",
flush=True,
)
out_index = {
"storyboard": args.storyboard,
"instruct": instruct,
"language": language,
"designModel": args.design_model,
"cloneModel": args.clone_model,
"referenceAudio": reference_audio_cache_key,
"referenceText": ref_text,
"seed": seed,
"temperature": temperature,
"topP": top_p,
"items": items,
}
(audio_dir / "index.json").write_text(json.dumps(out_index, indent=2))
write_index(items)
total_ms = sum(it["gapBeforeMs"] + it["durationMs"] for it in items)
print(
f"[synth] [{args.storyboard}] {len(items)} cues, {total_ms}ms of audio (incl. gaps) -> {audio_dir}",