lgtm
This commit is contained in:
parent
8708bf000d
commit
11711c57e6
38 changed files with 5361 additions and 265 deletions
|
|
@ -2,10 +2,10 @@
|
|||
#
|
||||
# End-to-end re-render of the dashboard demo videos.
|
||||
#
|
||||
# All per-storyboard knobs (aspect, fps, bitrate, prompt text, voice persona,
|
||||
# poster timestamp, brand strings…) live on the Storyboard objects in
|
||||
# src/storyboard.ts. To add a vertical cut or change the voice, edit that
|
||||
# file — this script only handles target/auth/transport concerns.
|
||||
# All per-storyboard knobs (aspect, fps, bitrate, prompt text, localized
|
||||
# narration, voice persona, poster timestamp, brand strings…) live in
|
||||
# src/storyboard.ts. A single visual storyboard can expand into multiple
|
||||
# language variants there; this script renders every emitted slug.
|
||||
#
|
||||
# Two targets:
|
||||
# local (default) — assumes the docker-compose stack on host.docker.internal,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ export async function launchRecordingBrowser(
|
|||
deviceScaleFactor: storyboard.video.captureScale,
|
||||
recordVideo: { dir: opts.recordDir, size: viewport },
|
||||
});
|
||||
await context.addInitScript((appLanguage) => {
|
||||
if (appLanguage) localStorage.setItem('language', appLanguage);
|
||||
}, storyboard.content.appLanguage ?? 'en');
|
||||
await suppressDevServerNoise(context);
|
||||
return { browser, context };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -307,12 +307,19 @@ export async function showOutro(
|
|||
document.getElementById('__demo-caption')?.classList.remove('visible');
|
||||
const el = document.createElement('div');
|
||||
el.id = '__demo-outro';
|
||||
el.innerHTML = `
|
||||
<div id="__demo-outro-card">
|
||||
<div id="__demo-outro-brand">${brand}</div>
|
||||
<div id="__demo-outro-tagline">${tagline}</div>
|
||||
<div id="__demo-outro-url">${url}</div>
|
||||
</div>`;
|
||||
const card = document.createElement('div');
|
||||
card.id = '__demo-outro-card';
|
||||
const brandEl = document.createElement('div');
|
||||
brandEl.id = '__demo-outro-brand';
|
||||
brandEl.textContent = brand;
|
||||
const taglineEl = document.createElement('div');
|
||||
taglineEl.id = '__demo-outro-tagline';
|
||||
taglineEl.textContent = tagline;
|
||||
const urlEl = document.createElement('div');
|
||||
urlEl.id = '__demo-outro-url';
|
||||
urlEl.textContent = url;
|
||||
card.append(brandEl, taglineEl, urlEl);
|
||||
el.appendChild(card);
|
||||
document.body.appendChild(el);
|
||||
requestAnimationFrame(() => {
|
||||
requestAnimationFrame(() => el.classList.add('visible'));
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ function emitScript(storyboard: Storyboard): string {
|
|||
voice: {
|
||||
instruct: storyboard.voice.instruct,
|
||||
language: storyboard.voice.language,
|
||||
referenceText: storyboard.voice.referenceText,
|
||||
temperature: storyboard.voice.temperature ?? 0.6,
|
||||
topP: storyboard.voice.topP ?? 0.9,
|
||||
seed: storyboard.voice.seed ?? 42,
|
||||
|
|
@ -68,6 +69,7 @@ function main(): void {
|
|||
const index = {
|
||||
storyboards: storyboards.map((sb) => ({
|
||||
name: sb.name,
|
||||
locale: sb.locale ?? sb.content.appLanguage,
|
||||
aspect: sb.video.aspect,
|
||||
outputFps: sb.video.outputFps,
|
||||
minDurationS: sb.video.minDurationS,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ export interface RunnerResult {
|
|||
const MAP_ZOOM_WHEEL_DELTA = -120;
|
||||
const FALLBACK_MS_PER_WORD = 750;
|
||||
const FALLBACK_TAIL_BUFFER_MS = 800;
|
||||
const CJK_CHARS_PER_FALLBACK_WORD = 2;
|
||||
|
||||
interface SynthCue {
|
||||
cueIndex: number;
|
||||
|
|
@ -266,10 +267,15 @@ function loadSynthIndex(storyboard: Storyboard): SynthCue[] {
|
|||
return storyboard.cues.map((cue, cueIndex) => ({
|
||||
cueIndex,
|
||||
text: cue.text,
|
||||
durationMs:
|
||||
cue.text.split(/\s+/).filter(Boolean).length * FALLBACK_MS_PER_WORD +
|
||||
FALLBACK_TAIL_BUFFER_MS,
|
||||
durationMs: estimateFallbackDurationMs(cue.text),
|
||||
}));
|
||||
}
|
||||
|
||||
function estimateFallbackDurationMs(text: string): number {
|
||||
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
||||
const cjkCount = text.match(/\p{Script=Han}/gu)?.length ?? 0;
|
||||
const units = Math.max(wordCount, Math.ceil(cjkCount / CJK_CHARS_PER_FALLBACK_WORD), 1);
|
||||
return units * FALLBACK_MS_PER_WORD + FALLBACK_TAIL_BUFFER_MS;
|
||||
}
|
||||
|
||||
export type { Page };
|
||||
|
|
|
|||
|
|
@ -124,6 +124,8 @@ export interface VoiceConfig {
|
|||
instruct: string;
|
||||
/** Qwen3-TTS language string, e.g. "English". */
|
||||
language: string;
|
||||
/** Reference utterance used when minting a generated voice for this language. */
|
||||
referenceText?: string;
|
||||
/** Sampling temperature (default 0.6). */
|
||||
temperature?: number;
|
||||
/** Top-p nucleus sampling (default 0.9). */
|
||||
|
|
@ -147,6 +149,8 @@ export interface BrandConfig {
|
|||
export interface ContentConfig {
|
||||
/** Prompt text typed into the AI box during the cold open. */
|
||||
promptText: string;
|
||||
/** Frontend i18n language code to set before loading the dashboard. */
|
||||
appLanguage?: string;
|
||||
/** Cold-open zoom multiplier on the AI card. */
|
||||
aiZoomScale: number;
|
||||
initialMapView: { lat: number; lon: number; zoom: number };
|
||||
|
|
@ -177,6 +181,8 @@ export interface TravelTimeFilter {
|
|||
*/
|
||||
export interface Storyboard {
|
||||
name: string;
|
||||
/** Optional language/variant code, used for manifests and logging. */
|
||||
locale?: string;
|
||||
video: VideoConfig;
|
||||
voice: VoiceConfig;
|
||||
content: ContentConfig;
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@ import { el, type Storyboard } from './script.js';
|
|||
* Each entry is a fully self-contained Storyboard: video knobs (aspect,
|
||||
* bitrate, fps), voice persona (Qwen3-TTS instruct + language + sampling),
|
||||
* stubbed AI response, brand strings, AND the cue list. There is no shared
|
||||
* global state — to ship a vertical cut, a different prompt, or a different
|
||||
* voice, push another item onto this array.
|
||||
* global state. The exported array can contain generated variants, so a
|
||||
* shared visual storyboard can render once per language without repeating
|
||||
* its activity sequence.
|
||||
*
|
||||
* `name` doubles as the on-disk slug. The pipeline writes per-storyboard
|
||||
* artefacts to `output/<name>/` and publishes `<name>.mp4` / `<name>.jpg`
|
||||
|
|
@ -23,15 +24,6 @@ import { el, type Storyboard } from './script.js';
|
|||
* before the next cue's gap).
|
||||
*/
|
||||
|
||||
const PROMPT_TEXT = 'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area';
|
||||
|
||||
const BRAND = {
|
||||
name: 'Perfect Postcode',
|
||||
tagline: 'Your best chance to find your next perfect home.',
|
||||
url: 'https://perfect-postcode.co.uk',
|
||||
};
|
||||
|
||||
|
||||
const AI_ZOOM_SCALE = 2.4;
|
||||
|
||||
const TT_CARD_SELECTOR = '[data-filter-name="tt_0"]';
|
||||
|
|
@ -39,103 +31,238 @@ const TT_SLIDER_MAX = 120;
|
|||
const TT_DRAG_FROM_MIN = 35;
|
||||
const TT_DRAG_TO_MIN = 20;
|
||||
|
||||
const BRITISH_MALE_NARRATOR =
|
||||
'Calm and cheerful young British male narrator from the North of England with a ' +
|
||||
'strong Manchester accent.';
|
||||
type RecordingLocale = 'en' | 'de' | 'zh' | 'hi';
|
||||
|
||||
const DEFAULT_CUES: Storyboard['cues'] = [
|
||||
{
|
||||
text: 'Start by describing the type of place you\'re looking for',
|
||||
gapBeforeMs: 0,
|
||||
tail: [
|
||||
{
|
||||
kind: 'type',
|
||||
selector: '[data-tutorial="ai-filters"] textarea',
|
||||
text: PROMPT_TEXT,
|
||||
durationMs: 3000,
|
||||
},
|
||||
{ kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 },
|
||||
],
|
||||
},
|
||||
{
|
||||
text: 'The dashboard will show you the likeliest places that will meet your expectations',
|
||||
gapBeforeMs: 400,
|
||||
during: [{ kind: 'zoomReset', durationMs: 1400 }],
|
||||
tail: [{ kind: 'wait', durationMs: 500 }],
|
||||
},
|
||||
interface RecordingLocalization {
|
||||
name: string;
|
||||
appLanguage: string;
|
||||
ttsLanguage: string;
|
||||
voiceInstruct: string;
|
||||
voiceReferenceText: string;
|
||||
promptText: string;
|
||||
travelTimeLabel: string;
|
||||
exportButtonTitle: string;
|
||||
brand: {
|
||||
name: string;
|
||||
tagline: string;
|
||||
url: string;
|
||||
};
|
||||
cues: {
|
||||
describe: string;
|
||||
dashboard: string;
|
||||
filters: string;
|
||||
details: string;
|
||||
shortlist: string;
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
text: `Adjust the filters to narrow down to the best candidates`,
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{
|
||||
kind: 'dragSlider',
|
||||
thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
|
||||
trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
|
||||
toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
|
||||
durationMs: 1000,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 400 }],
|
||||
},
|
||||
const BRAND_URL = 'https://perfect-postcode.co.uk';
|
||||
|
||||
{
|
||||
text: 'And now it\'s time to dig into the details. Looks good to me!',
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{ kind: 'cursorScale', scale: 1.4, durationMs: 200 },
|
||||
{
|
||||
kind: 'mapZoom',
|
||||
target: { kind: 'point', x: 1140, y: 605 },
|
||||
steps: 18,
|
||||
durationMs: 1500,
|
||||
},
|
||||
],
|
||||
tail: [
|
||||
// Wait for the post-zoom /api/postcodes response and a redraw
|
||||
// before the click — otherwise the click can fire on a stale
|
||||
// frame and miss the polygon.
|
||||
{ kind: 'wait', durationMs: 500 },
|
||||
{
|
||||
kind: 'click',
|
||||
target: { kind: 'point', x: 1140, y: 605 },
|
||||
durationMs: 700,
|
||||
},
|
||||
{ kind: 'cursorScale', scale: 1, durationMs: 280 },
|
||||
// Linger so the climax cue lands on the right-pane reveal.
|
||||
{ kind: 'wait', durationMs: 1500 },
|
||||
],
|
||||
const RECORDING_LOCALIZATIONS: Record<RecordingLocale, RecordingLocalization> = {
|
||||
en: {
|
||||
name: 'recording',
|
||||
appLanguage: 'en',
|
||||
ttsLanguage: 'English',
|
||||
voiceInstruct:
|
||||
'Calm and cheerful young British male narrator from the North of England with a ' +
|
||||
'strong Manchester accent.',
|
||||
voiceReferenceText:
|
||||
"Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.",
|
||||
promptText:
|
||||
'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area',
|
||||
travelTimeLabel: 'Manchester city centre',
|
||||
exportButtonTitle: 'Export to Excel',
|
||||
brand: {
|
||||
name: 'Perfect Postcode',
|
||||
tagline: 'Your best chance to find your next perfect home.',
|
||||
url: BRAND_URL,
|
||||
},
|
||||
cues: {
|
||||
describe: "Start by describing the type of place you're looking for",
|
||||
dashboard: 'The dashboard will show you the likeliest places that will meet your expectations',
|
||||
filters: 'Adjust the filters to narrow down to the best candidates',
|
||||
details: "And now it's time to dig into the details. Looks good to me!",
|
||||
shortlist:
|
||||
'Now you can take your shortlist and start looking for your next home in your perfect postcode.',
|
||||
},
|
||||
},
|
||||
de: {
|
||||
name: 'recording-de',
|
||||
appLanguage: 'de',
|
||||
ttsLanguage: 'German',
|
||||
voiceInstruct:
|
||||
'Calm and cheerful German male narrator with clear standard German pronunciation ' +
|
||||
'and a friendly, practical delivery.',
|
||||
voiceReferenceText:
|
||||
'Willkommen zur Demonstration. Diese Sprecherstimme hören Sie im gesamten Video.',
|
||||
promptText:
|
||||
'Wohnungen unter £300k, 35 Min. Pendelzeit nach Manchester, nahe einer herausragenden Schule in einer sehr kriminalitätsarmen Gegend',
|
||||
travelTimeLabel: 'Stadtzentrum Manchester',
|
||||
exportButtonTitle: 'Als Excel exportieren',
|
||||
brand: {
|
||||
name: 'Perfect Postcode',
|
||||
tagline: 'Ihre beste Chance, Ihr nächstes perfektes Zuhause zu finden.',
|
||||
url: BRAND_URL,
|
||||
},
|
||||
cues: {
|
||||
describe: 'Beschreiben Sie zuerst, wonach Sie suchen.',
|
||||
dashboard: 'Das Dashboard zeigt die Orte, die Ihre Erwartungen am ehesten erfüllen.',
|
||||
filters: 'Passen Sie die Filter an, um die besten Kandidaten einzugrenzen.',
|
||||
details: 'Jetzt geht es in die Details. Sieht gut aus!',
|
||||
shortlist:
|
||||
'Jetzt können Sie Ihre Auswahl nehmen und Ihr nächstes Zuhause in Ihrem perfekten Postcode suchen.',
|
||||
},
|
||||
},
|
||||
zh: {
|
||||
name: 'recording-zh',
|
||||
appLanguage: 'zh',
|
||||
ttsLanguage: 'Chinese',
|
||||
voiceInstruct:
|
||||
'Calm and cheerful Mandarin Chinese male narrator with clear standard Mandarin ' +
|
||||
'pronunciation and a friendly, practical delivery.',
|
||||
voiceReferenceText: '欢迎观看演示。整段视频都会使用这位旁白的声音。',
|
||||
promptText: '30万英镑以内的公寓,35分钟通勤到曼彻斯特,靠近优秀学校,犯罪率很低的区域',
|
||||
travelTimeLabel: '曼彻斯特市中心',
|
||||
exportButtonTitle: '导出为 Excel',
|
||||
brand: {
|
||||
name: 'Perfect Postcode',
|
||||
tagline: '帮你更有把握找到下一个理想家。',
|
||||
url: BRAND_URL,
|
||||
},
|
||||
cues: {
|
||||
describe: '先描述你想找什么样的地方',
|
||||
dashboard: '仪表板会显示最符合你期望的地点',
|
||||
filters: '调整筛选条件,缩小到最合适的候选区域',
|
||||
details: '现在深入查看细节。看起来不错!',
|
||||
shortlist: '现在你可以带着候选清单,开始寻找理想邮编里的下一个家。',
|
||||
},
|
||||
},
|
||||
hi: {
|
||||
name: 'recording-hi',
|
||||
appLanguage: 'hi',
|
||||
ttsLanguage: 'English',
|
||||
voiceInstruct:
|
||||
'Calm and cheerful Indian male narrator speaking English with a strong Indian accent ' +
|
||||
'and a friendly, practical delivery.',
|
||||
voiceReferenceText:
|
||||
"Welcome to the demonstration. This is the narrator voice you'll hear throughout the video.",
|
||||
promptText:
|
||||
'Flats <£300k, 35 min to commute Manchester close to an outstanding school in a quite low crime area',
|
||||
travelTimeLabel: 'Manchester city centre',
|
||||
exportButtonTitle: 'Excel में निर्यात करें',
|
||||
brand: {
|
||||
name: 'Perfect Postcode',
|
||||
tagline: 'Your best chance to find your next perfect home.',
|
||||
url: BRAND_URL,
|
||||
},
|
||||
cues: {
|
||||
describe: "Start by describing the type of place you're looking for",
|
||||
dashboard: 'The dashboard will show you the likeliest places that will meet your expectations',
|
||||
filters: 'Adjust the filters to narrow down to the best candidates',
|
||||
details: "And now it's time to dig into the details. Looks good to me!",
|
||||
shortlist:
|
||||
'Now you can take your shortlist and start looking for your next home in your perfect postcode.',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
text: 'Now you can take your shortlist and start looking for your next home in your perfect postcode.',
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{ kind: 'zoomReset', durationMs: 900 },
|
||||
{
|
||||
kind: 'click',
|
||||
target: el('button[title="Export to Excel"]'),
|
||||
durationMs: 800,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 800 }],
|
||||
},
|
||||
function createCues(locale: RecordingLocale): Storyboard['cues'] {
|
||||
const copy = RECORDING_LOCALIZATIONS[locale];
|
||||
|
||||
{
|
||||
text: `${BRAND.name}. ${BRAND.tagline}`,
|
||||
gapBeforeMs: 600,
|
||||
during: [
|
||||
{
|
||||
kind: 'showOutro',
|
||||
brand: BRAND.name,
|
||||
tagline: BRAND.tagline,
|
||||
url: BRAND.url,
|
||||
durationMs: 0,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 1500 }],
|
||||
},
|
||||
];
|
||||
return [
|
||||
{
|
||||
text: copy.cues.describe,
|
||||
gapBeforeMs: 0,
|
||||
tail: [
|
||||
{
|
||||
kind: 'type',
|
||||
selector: '[data-tutorial="ai-filters"] textarea',
|
||||
text: copy.promptText,
|
||||
durationMs: 3000,
|
||||
},
|
||||
{ kind: 'submitForm', formSelector: '[data-tutorial="ai-filters"] form', durationMs: 1700 },
|
||||
],
|
||||
},
|
||||
{
|
||||
text: copy.cues.dashboard,
|
||||
gapBeforeMs: 400,
|
||||
during: [{ kind: 'zoomReset', durationMs: 1400 }],
|
||||
tail: [{ kind: 'wait', durationMs: 500 }],
|
||||
},
|
||||
|
||||
{
|
||||
text: copy.cues.filters,
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{
|
||||
kind: 'dragSlider',
|
||||
thumbSelector: `${TT_CARD_SELECTOR} [role="slider"] >> nth=1`,
|
||||
trackSelector: `${TT_CARD_SELECTOR} [data-orientation="horizontal"] >> nth=0`,
|
||||
toFraction: TT_DRAG_TO_MIN / TT_SLIDER_MAX,
|
||||
durationMs: 1000,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 400 }],
|
||||
},
|
||||
|
||||
{
|
||||
text: copy.cues.details,
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{ kind: 'cursorScale', scale: 1.4, durationMs: 200 },
|
||||
{
|
||||
kind: 'mapZoom',
|
||||
target: { kind: 'point', x: 1140, y: 605 },
|
||||
steps: 18,
|
||||
durationMs: 1500,
|
||||
},
|
||||
],
|
||||
tail: [
|
||||
// Wait for the post-zoom /api/postcodes response and a redraw
|
||||
// before the click — otherwise the click can fire on a stale
|
||||
// frame and miss the polygon.
|
||||
{ kind: 'wait', durationMs: 500 },
|
||||
{
|
||||
kind: 'click',
|
||||
target: { kind: 'point', x: 1140, y: 605 },
|
||||
durationMs: 700,
|
||||
},
|
||||
{ kind: 'cursorScale', scale: 1, durationMs: 280 },
|
||||
// Linger so the climax cue lands on the right-pane reveal.
|
||||
{ kind: 'wait', durationMs: 1500 },
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
text: copy.cues.shortlist,
|
||||
gapBeforeMs: 500,
|
||||
during: [
|
||||
{ kind: 'zoomReset', durationMs: 900 },
|
||||
{
|
||||
kind: 'click',
|
||||
target: el(`button[title="${copy.exportButtonTitle}"]`),
|
||||
durationMs: 800,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 800 }],
|
||||
},
|
||||
|
||||
{
|
||||
text: `${copy.brand.name}. ${copy.brand.tagline}`,
|
||||
gapBeforeMs: 600,
|
||||
during: [
|
||||
{
|
||||
kind: 'showOutro',
|
||||
brand: copy.brand.name,
|
||||
tagline: copy.brand.tagline,
|
||||
url: copy.brand.url,
|
||||
durationMs: 0,
|
||||
},
|
||||
],
|
||||
tail: [{ kind: 'wait', durationMs: 1500 }],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const DEFAULT_PRE: Storyboard['pre'] = [
|
||||
{ kind: 'clearVignette', durationMs: 0 },
|
||||
|
|
@ -149,9 +276,12 @@ const DEFAULT_PRE: Storyboard['pre'] = [
|
|||
{ kind: 'wait', durationMs: 140 },
|
||||
];
|
||||
|
||||
export const storyboards: Storyboard[] = [
|
||||
{
|
||||
name: 'recording',
|
||||
function createRecordingStoryboard(locale: RecordingLocale): Storyboard {
|
||||
const copy = RECORDING_LOCALIZATIONS[locale];
|
||||
|
||||
return {
|
||||
name: copy.name,
|
||||
locale,
|
||||
video: {
|
||||
aspect: '16x9',
|
||||
captureScale: 1,
|
||||
|
|
@ -168,23 +298,25 @@ export const storyboards: Storyboard[] = [
|
|||
posterTimeS: 16,
|
||||
},
|
||||
voice: {
|
||||
instruct: BRITISH_MALE_NARRATOR,
|
||||
language: 'English',
|
||||
instruct: copy.voiceInstruct,
|
||||
language: copy.ttsLanguage,
|
||||
referenceText: copy.voiceReferenceText,
|
||||
temperature: 0.6,
|
||||
topP: 0.9,
|
||||
seed: 42,
|
||||
},
|
||||
content: {
|
||||
promptText: PROMPT_TEXT,
|
||||
promptText: copy.promptText,
|
||||
appLanguage: copy.appLanguage,
|
||||
aiZoomScale: AI_ZOOM_SCALE,
|
||||
initialMapView: { lat: 53.4795, lon: -2.2451, zoom: 11.5 },
|
||||
// Filters returned by the AI stub. Keys MUST match real feature names
|
||||
// from /api/features (verified against the running server's schema).
|
||||
stubbedFilters: {
|
||||
'Property type': ['Flats/Maisonettes', 'Terraced'],
|
||||
'Estimated current price': [175000, 450000],
|
||||
'Property type': ['Flats/Maisonettes'],
|
||||
'Estimated current price': [0, 300000],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 55],
|
||||
'Noise (dB)': [50, 68],
|
||||
'Outstanding primary schools within 2km': [1, 10],
|
||||
},
|
||||
// Travel-time filters returned by the AI stub. Slug matches the real
|
||||
// /api/travel-destinations?mode=transit response.
|
||||
|
|
@ -192,7 +324,7 @@ export const storyboards: Storyboard[] = [
|
|||
{
|
||||
mode: 'transit',
|
||||
slug: 'manchester',
|
||||
label: 'Manchester city centre',
|
||||
label: copy.travelTimeLabel,
|
||||
max: TT_DRAG_FROM_MIN,
|
||||
},
|
||||
],
|
||||
|
|
@ -200,12 +332,16 @@ export const storyboards: Storyboard[] = [
|
|||
travelTimeSliderMax: TT_SLIDER_MAX,
|
||||
travelTimeDragFromMin: TT_DRAG_FROM_MIN,
|
||||
travelTimeDragToMin: TT_DRAG_TO_MIN,
|
||||
brand: BRAND,
|
||||
brand: copy.brand,
|
||||
},
|
||||
pre: DEFAULT_PRE,
|
||||
cues: DEFAULT_CUES,
|
||||
},
|
||||
];
|
||||
cues: createCues(locale),
|
||||
};
|
||||
}
|
||||
|
||||
export const storyboards: Storyboard[] = (['en', 'de', 'zh', 'hi'] as const).map((locale) =>
|
||||
createRecordingStoryboard(locale)
|
||||
);
|
||||
|
||||
export function getStoryboard(name: string): Storyboard {
|
||||
const sb = storyboards.find((s) => s.name === name);
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ def cached_index_matches(
|
|||
cues: list[dict],
|
||||
instruct: str,
|
||||
language: str,
|
||||
reference_text: str,
|
||||
design_model: str,
|
||||
clone_model: str,
|
||||
reference_audio: str,
|
||||
seed: int,
|
||||
temperature: float,
|
||||
top_p: float,
|
||||
|
|
@ -123,7 +127,8 @@ def cached_index_matches(
|
|||
"""Return True iff index_path's cue list lines up with `cues` 1:1.
|
||||
|
||||
Compared fields: ``cueIndex``, ``text``, ``gapBeforeMs`` plus the synth
|
||||
settings (``instruct``, ``language``, ``seed``, ``temperature``, ``top_p``).
|
||||
settings (``instruct``, ``language``, reference text, models, ``seed``,
|
||||
``temperature``, ``top_p``).
|
||||
All cue WAV files must also exist on disk. Mismatched length, reordered
|
||||
cues, or a missing WAV invalidate the cache.
|
||||
"""
|
||||
|
|
@ -135,6 +140,12 @@ def cached_index_matches(
|
|||
return False
|
||||
if cached.get("instruct") != instruct or cached.get("language") != language:
|
||||
return False
|
||||
if cached.get("referenceText") != reference_text:
|
||||
return False
|
||||
if cached.get("designModel") != design_model or cached.get("cloneModel") != clone_model:
|
||||
return False
|
||||
if cached.get("referenceAudio", "") != reference_audio:
|
||||
return False
|
||||
if int(cached.get("seed", -1)) != seed:
|
||||
return False
|
||||
if float(cached.get("temperature", -1)) != temperature:
|
||||
|
|
@ -170,6 +181,7 @@ def _resolve_reference(
|
|||
audio_dir: Path,
|
||||
instruct: str,
|
||||
language: str,
|
||||
reference_text: str,
|
||||
seed: int,
|
||||
temperature: float,
|
||||
top_p: float,
|
||||
|
|
@ -178,8 +190,8 @@ def _resolve_reference(
|
|||
|
||||
If --reference-audio is supplied, validate and use it directly. Otherwise
|
||||
mint one via VoiceDesign (cached on disk; cache invalidates when the
|
||||
persona/sampling/seed changes). The design model is unloaded before
|
||||
returning so the clone model can claim the GPU.
|
||||
persona/language/reference/sampling/seed changes). The design model is
|
||||
unloaded before returning so the clone model can claim the GPU.
|
||||
"""
|
||||
if args.reference_audio is not None:
|
||||
if not args.reference_audio.exists():
|
||||
|
|
@ -201,7 +213,7 @@ def _resolve_reference(
|
|||
"seed": seed,
|
||||
"temperature": temperature,
|
||||
"topP": top_p,
|
||||
"text": REFERENCE_TEXT,
|
||||
"text": reference_text,
|
||||
}
|
||||
if (
|
||||
ref_wav_path.exists()
|
||||
|
|
@ -209,16 +221,16 @@ def _resolve_reference(
|
|||
and _safe_load_json(ref_meta_path) == ref_meta
|
||||
):
|
||||
print(f"[synth] reusing cached voice reference {ref_wav_path.name}", flush=True)
|
||||
return ref_wav_path, REFERENCE_TEXT
|
||||
return ref_wav_path, reference_text
|
||||
|
||||
print(
|
||||
f"[synth] minting voice reference via VoiceDesign: «{REFERENCE_TEXT}»",
|
||||
f"[synth] minting voice reference via VoiceDesign: «{reference_text}»",
|
||||
flush=True,
|
||||
)
|
||||
design_model = load_model(args.design_model, args.device)
|
||||
seed_everything(seed)
|
||||
ref_wavs, ref_sr = design_model.generate_voice_design(
|
||||
text=[REFERENCE_TEXT],
|
||||
text=[reference_text],
|
||||
language=language,
|
||||
instruct=instruct,
|
||||
do_sample=True,
|
||||
|
|
@ -237,7 +249,7 @@ def _resolve_reference(
|
|||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
return ref_wav_path, REFERENCE_TEXT
|
||||
return ref_wav_path, reference_text
|
||||
|
||||
|
||||
def main() -> int:
|
||||
|
|
@ -266,21 +278,30 @@ def main() -> int:
|
|||
return 1
|
||||
instruct = voice["instruct"]
|
||||
language = voice["language"]
|
||||
reference_text = str(voice.get("referenceText") or REFERENCE_TEXT)
|
||||
temperature = float(voice.get("temperature", 0.6))
|
||||
top_p = float(voice.get("topP", 0.9))
|
||||
seed = int(voice.get("seed", 42))
|
||||
reference_audio_cache_key = (
|
||||
str(args.reference_audio.resolve()) if args.reference_audio is not None else ""
|
||||
)
|
||||
|
||||
audio_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Skip generation when the existing audio matches the script — same cue
|
||||
# texts and same gapBeforeMs values in the same order, AND same synth
|
||||
# settings (instruct/seed/temperature/top_p). Saves ~30s of GPU time when
|
||||
# iterating on activity timing without changing narration or persona.
|
||||
# settings (instruct/language/reference/model/seed/temperature/top_p).
|
||||
# Saves ~30s of GPU time when iterating on activity timing without
|
||||
# changing narration or persona.
|
||||
if cached_index_matches(
|
||||
audio_dir / "index.json",
|
||||
cues,
|
||||
instruct,
|
||||
language,
|
||||
reference_text,
|
||||
args.design_model,
|
||||
args.clone_model,
|
||||
reference_audio_cache_key,
|
||||
seed,
|
||||
temperature,
|
||||
top_p,
|
||||
|
|
@ -308,7 +329,7 @@ def main() -> int:
|
|||
# own voice. The reference WAV is cached so subsequent runs only load
|
||||
# the clone model (saves ~20s + 3.4 GB of disk download).
|
||||
ref_wav_path, ref_text = _resolve_reference(
|
||||
args, audio_dir, instruct, language, seed, temperature, top_p
|
||||
args, audio_dir, instruct, language, reference_text, seed, temperature, top_p
|
||||
)
|
||||
|
||||
print(
|
||||
|
|
@ -367,6 +388,7 @@ def main() -> int:
|
|||
"language": language,
|
||||
"designModel": args.design_model,
|
||||
"cloneModel": args.clone_model,
|
||||
"referenceAudio": reference_audio_cache_key,
|
||||
"referenceText": ref_text,
|
||||
"seed": seed,
|
||||
"temperature": temperature,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue