perfect-postcode/frontend/scripts/check-translations.mjs
2026-05-12 08:05:29 +01:00

460 lines
15 KiB
JavaScript

#!/usr/bin/env node
// Validates that every translation file under src/i18n is complete and consistent.
//
// Checks:
// 1. Locales declared in SUPPORTED_LANGUAGES (index.ts) match the files in locales/
// and the language records in descriptions.ts / details.ts.
// 2. Every leaf key in en.ts is present and non-empty in every other locale.
// 3. Every {{placeholder}} and HTML tag in an English string also appears,
// with the same multiset, in the translated string.
// 4. descriptions.ts and details.ts: the union of feature-name keys across
// languages is treated as canonical; every language must cover all of them.
// 5. The lazy locale loader map covers every non-English supported language.
// 6. Selected visible UI strings that previously slipped through are not
// hardcoded outside the i18n files.
//
// The script parses the TypeScript source with the compiler API and walks the
// AST — no runtime import, no transpilation, no temp files. Run it with:
// node frontend/scripts/check-translations.mjs
import { readFileSync, readdirSync } from 'fs';
import { dirname, join, relative } from 'path';
import { fileURLToPath } from 'url';
import ts from 'typescript';
const __dirname = dirname(fileURLToPath(import.meta.url));
const I18N_DIR = join(__dirname, '..', 'src', 'i18n');
const LOCALES_DIR = join(I18N_DIR, 'locales');
const SRC_DIR = join(__dirname, '..', 'src');
const PLACEHOLDER_RE = /\{\{\s*[a-zA-Z_][\w]*\s*\}\}/g;
const HTML_TAG_RE = /<\/?[a-zA-Z][\w]*\b[^>]*>/g;
const errors = [];
const warnings = [];
const fail = (msg) => errors.push(msg);
const warn = (msg) => warnings.push(msg);
const SAME_AS_EN_PATH_ALLOWLIST = new Set([
'streetView.title',
'home.showcaseMinutes',
'home.showcaseStep2Sources',
'format.lessThanMin',
'format.moreThanMin',
'learnPage.attrOglLink',
'learnPage.attrOsmContrib',
'learnPage.attrOsmLicenseLink',
'home.showcaseTopThree',
'learnPage.dsTravelOrigin',
'learnPage.dsParkOrigin',
'learnPage.dsCrimeOrigin',
'learnPage.dsDemographicsOrigin',
'learnPage.dsElectionOrigin',
'learnPage.dsPoiOrigin',
'learnPage.dsEthnicityOrigin',
'learnPage.dsBroadbandOrigin',
]);
const SAME_AS_EN_PATH_ALLOWLIST_RE = [/^learnPage\.ds[A-Za-z0-9]+Origin$/];
const SAME_AS_EN_VALUE_ALLOWLIST = new Set([
'Perfect Postcode',
'Land Registry',
'HM Land Registry',
'ONS',
'OpenStreetMap',
'Ofsted',
'Rightmove',
'Zoopla',
'Google',
'Excel',
'UK',
'Reform UK',
'Labour',
'Conservative',
'Liberal Democrat',
]);
const FORBIDDEN_VISIBLE_STRINGS = [
['without this filter', 'filters.filtersOut'],
['Connecting to server...', 'common.connectingToServer'],
['Property saved!', 'toasts.propertySaved'],
['View saved', 'toasts.viewSaved'],
["Don't show again", 'toasts.dontShowAgain'],
['Close pane', 'common.closePane'],
['Points of interest', 'poiPane.pointsOfInterest'],
['No data', 'common.noData'],
['All low', 'common.allLow'],
['School type', 'filters.schoolType'],
['School rating', 'filters.schoolRating'],
['School distance', 'filters.schoolDistance'],
['Crime type', 'filters.crimeType'],
['POI type', 'filters.poiType'],
['Matching homes', 'home.showcaseMatchingHomesLabel'],
['Journey routes', 'home.showcaseJourneyRoutes'],
['...and lots more', 'home.showcaseLotsMore'],
['Send the shortlist', 'home.showcaseSendShortlist'],
['Download .xlsx', 'home.showcaseDownloadXlsx'],
['Product demo', 'home.productDemoLabel'],
['Play product demo', 'home.playProductDemo'],
['Scroll to product demo', 'home.scrollToProductDemo'],
];
function parseFile(path) {
const src = readFileSync(path, 'utf8');
return ts.createSourceFile(path, src, ts.ScriptTarget.Latest, true);
}
// Recursively turn a TS literal expression into a plain JS value.
// Returns undefined for nodes we don't understand — callers must check.
function literalToJs(node) {
if (!node) return undefined;
if (ts.isStringLiteral(node) || ts.isNoSubstitutionTemplateLiteral(node)) return node.text;
if (ts.isAsExpression(node) || ts.isParenthesizedExpression(node)) {
return literalToJs(node.expression);
}
if (ts.isSatisfiesExpression?.(node)) {
return literalToJs(node.expression);
}
if (ts.isObjectLiteralExpression(node)) {
const out = {};
for (const prop of node.properties) {
if (!ts.isPropertyAssignment(prop)) continue;
const k = prop.name;
let key;
if (ts.isIdentifier(k)) key = k.text;
else if (ts.isStringLiteral(k) || ts.isNoSubstitutionTemplateLiteral(k)) key = k.text;
else continue;
out[key] = literalToJs(prop.initializer);
}
return out;
}
if (ts.isArrayLiteralExpression(node)) {
return node.elements.map((e) => literalToJs(e));
}
return undefined;
}
function findVarInitializer(sourceFile, name) {
let result;
function visit(node) {
if (ts.isVariableStatement(node)) {
for (const decl of node.declarationList.declarations) {
if (ts.isIdentifier(decl.name) && decl.name.text === name) {
result = decl.initializer;
}
}
}
ts.forEachChild(node, visit);
}
visit(sourceFile);
return result;
}
function propertyNameText(name) {
if (ts.isIdentifier(name)) return name.text;
if (ts.isStringLiteral(name) || ts.isNoSubstitutionTemplateLiteral(name)) return name.text;
return undefined;
}
function objectLiteralKeys(node) {
if (!node) return undefined;
if (ts.isAsExpression(node) || ts.isParenthesizedExpression(node)) {
return objectLiteralKeys(node.expression);
}
if (ts.isSatisfiesExpression?.(node)) {
return objectLiteralKeys(node.expression);
}
if (!ts.isObjectLiteralExpression(node)) return undefined;
const keys = [];
for (const prop of node.properties) {
if (ts.isPropertyAssignment(prop)) {
const key = propertyNameText(prop.name);
if (key) keys.push(key);
}
}
return keys;
}
function readSupportedLanguages() {
const sf = parseFile(join(I18N_DIR, 'index.ts'));
const init = findVarInitializer(sf, 'SUPPORTED_LANGUAGES');
if (!init) throw new Error('Could not find SUPPORTED_LANGUAGES in index.ts');
const arr = literalToJs(init);
if (!Array.isArray(arr)) throw new Error('SUPPORTED_LANGUAGES is not an array literal');
return arr.map((entry) => entry.code);
}
function readLocaleLoaderCodes() {
const sf = parseFile(join(I18N_DIR, 'index.ts'));
const init = findVarInitializer(sf, 'localeLoaders');
if (!init) throw new Error('Could not find localeLoaders in index.ts');
const keys = objectLiteralKeys(init);
if (!keys) throw new Error('localeLoaders is not an object literal');
return keys;
}
function readLocale(code) {
const path = join(LOCALES_DIR, `${code}.ts`);
const sf = parseFile(path);
const init = findVarInitializer(sf, code);
if (!init) throw new Error(`Could not find const ${code} in ${path}`);
const obj = literalToJs(init);
if (!obj || typeof obj !== 'object') throw new Error(`${code}.ts: not an object literal`);
return obj;
}
function readNamedRecord(file, varName) {
const sf = parseFile(join(I18N_DIR, file));
const init = findVarInitializer(sf, varName);
if (!init) throw new Error(`Could not find ${varName} in ${file}`);
const obj = literalToJs(init);
if (!obj || typeof obj !== 'object') throw new Error(`${file}: ${varName} is not an object`);
return obj;
}
function flatten(obj, prefix = '', out = new Map()) {
for (const [k, v] of Object.entries(obj)) {
const path = prefix ? `${prefix}.${k}` : k;
if (v !== null && typeof v === 'object' && !Array.isArray(v)) {
flatten(v, path, out);
} else {
out.set(path, v);
}
}
return out;
}
function tokenMultiset(s, re) {
const matches = String(s).match(re) || [];
// Normalise whitespace inside placeholders so '{{ count }}' == '{{count}}'.
return matches.map((t) => t.replace(/\s+/g, '')).sort();
}
function multisetsEqual(a, b) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
return true;
}
function checkLeafConsistency(path, enValue, trValue, lang) {
if (typeof trValue !== 'string') {
fail(`[${lang}] ${path}: missing translation`);
return;
}
if (trValue.trim() === '') {
fail(`[${lang}] ${path}: empty translation`);
return;
}
if (isSuspiciousSameAsEnglish(path, enValue, trValue)) {
warn(`[${lang}] ${path}: same as English; verify this is intentional`);
}
for (const [re, label] of [
[PLACEHOLDER_RE, 'placeholder'],
[HTML_TAG_RE, 'HTML tag'],
]) {
const want = tokenMultiset(enValue, re);
const got = tokenMultiset(trValue, re);
if (!multisetsEqual(want, got)) {
fail(
`[${lang}] ${path}: ${label} mismatch — en=${JSON.stringify(want)} ` +
`${lang}=${JSON.stringify(got)}`
);
}
}
}
function isSuspiciousSameAsEnglish(path, enValue, trValue) {
if (typeof enValue !== 'string' || typeof trValue !== 'string') return false;
if (enValue.trim() !== trValue.trim()) return false;
if (SAME_AS_EN_PATH_ALLOWLIST.has(path)) return false;
if (SAME_AS_EN_PATH_ALLOWLIST_RE.some((re) => re.test(path))) return false;
if (SAME_AS_EN_VALUE_ALLOWLIST.has(enValue.trim())) return false;
if (path.startsWith('server.')) return false;
const text = enValue.trim();
if (text.length < 8) return false;
if (!/[A-Za-z]/.test(text) || !/[a-z]/.test(text)) return false;
if (!/\s/.test(text)) return false;
if (/^https?:\/\//i.test(text)) return false;
if (/^[A-Z0-9 .&/()%+-]+$/.test(text)) return false;
return true;
}
function checkLocaleLoaders(supportedCodes) {
let loaderCodes;
try {
loaderCodes = readLocaleLoaderCodes();
} catch (e) {
fail(e.message);
return;
}
const expected = supportedCodes.filter((code) => code !== 'en');
for (const code of expected) {
if (!loaderCodes.includes(code)) {
fail(`localeLoaders is missing non-English supported language "${code}"`);
}
}
for (const code of loaderCodes) {
if (code === 'en') {
fail('localeLoaders should not include "en"; English is imported eagerly');
} else if (!expected.includes(code)) {
fail(`localeLoaders includes "${code}" but it is not in SUPPORTED_LANGUAGES`);
}
}
}
function checkLocales(supportedCodes) {
const localeFiles = readdirSync(LOCALES_DIR)
.filter((f) => f.endsWith('.ts'))
.map((f) => f.replace(/\.ts$/, ''));
for (const code of supportedCodes) {
if (!localeFiles.includes(code)) {
fail(`SUPPORTED_LANGUAGES lists "${code}" but locales/${code}.ts is missing`);
}
}
for (const code of localeFiles) {
if (!supportedCodes.includes(code)) {
warn(`locales/${code}.ts exists but is not listed in SUPPORTED_LANGUAGES`);
}
}
const en = flatten(readLocale('en'));
for (const code of supportedCodes) {
if (code === 'en') continue;
if (!localeFiles.includes(code)) continue;
const tr = flatten(readLocale(code));
for (const [path, enValue] of en) {
checkLeafConsistency(path, enValue, tr.get(path), code);
}
for (const path of tr.keys()) {
if (!en.has(path)) warn(`[${code}] ${path}: extra key not in en.ts`);
}
}
}
function checkRecordCoverage(file, varName, supportedCodes, serverKeys) {
const record = readNamedRecord(file, varName);
const expected = supportedCodes.filter((c) => c !== 'en');
const present = Object.keys(record);
for (const code of expected) {
if (!present.includes(code)) {
fail(`${file}: missing language record "${code}"`);
}
}
for (const code of present) {
if (!expected.includes(code)) {
warn(`${file}: unexpected language record "${code}"`);
}
}
// Use the union of feature-name keys across languages as canonical.
const union = new Set();
for (const code of expected) {
if (record[code]) for (const k of Object.keys(record[code])) union.add(k);
}
for (const code of expected) {
const langKeys = new Set(Object.keys(record[code] ?? {}));
for (const key of union) {
if (!langKeys.has(key)) {
fail(`${file} [${code}]: missing translation for "${key}"`);
} else {
const v = record[code][key];
if (typeof v !== 'string' || v.trim() === '') {
fail(`${file} [${code}]: empty translation for "${key}"`);
}
}
}
}
// Every key here must also be a translatable feature name in en.ts > server.
// Otherwise the description is unreachable — ts() looks up server.${name}.
for (const key of union) {
if (!serverKeys.has(key)) {
fail(`${file}: key "${key}" has no matching entry in en.ts > server`);
}
}
}
function collectSourceFiles(dir, out = []) {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const path = join(dir, entry.name);
const rel = relative(SRC_DIR, path).replace(/\\/g, '/');
if (entry.isDirectory()) {
if (rel === 'i18n' || rel.includes('/__tests__') || entry.name === '__tests__') continue;
collectSourceFiles(path, out);
continue;
}
if (!entry.isFile()) continue;
if (!/\.(ts|tsx)$/.test(entry.name)) continue;
if (/\.d\.ts$/.test(entry.name) || /\.(test|spec)\.(ts|tsx)$/.test(entry.name)) continue;
out.push(path);
}
return out;
}
function lineNumberAt(src, index) {
return src.slice(0, index).split(/\r\n|\r|\n/).length;
}
function checkForbiddenVisibleStrings() {
for (const file of collectSourceFiles(SRC_DIR)) {
const rel = relative(join(__dirname, '..'), file).replace(/\\/g, '/');
const src = readFileSync(file, 'utf8');
const sf = ts.createSourceFile(file, src, ts.ScriptTarget.Latest, true, ts.ScriptKind.TSX);
function checkText(textNode, value) {
for (const [text, key] of FORBIDDEN_VISIBLE_STRINGS) {
if (!value.includes(text)) continue;
fail(
`${rel}:${lineNumberAt(src, textNode.getStart(sf))}: hardcoded visible string ` +
`"${text}" should use "${key}"`
);
}
}
function visit(node) {
if (ts.isStringLiteral(node) || ts.isNoSubstitutionTemplateLiteral(node)) {
checkText(node, node.text);
} else if (ts.isJsxText(node)) {
checkText(node, node.getText(sf));
} else if (ts.isTemplateHead(node) || ts.isTemplateMiddle(node) || ts.isTemplateTail(node)) {
checkText(node, node.text);
}
ts.forEachChild(node, visit);
}
visit(sf);
}
}
function main() {
let supportedCodes;
try {
supportedCodes = readSupportedLanguages();
} catch (e) {
console.error(`fatal: ${e.message}`);
process.exit(2);
}
checkLocales(supportedCodes);
checkLocaleLoaders(supportedCodes);
const en = readLocale('en');
const serverKeys = new Set(Object.keys(en.server ?? {}));
checkRecordCoverage('descriptions.ts', 'descriptions', supportedCodes, serverKeys);
checkRecordCoverage('details.ts', 'details', supportedCodes, serverKeys);
checkForbiddenVisibleStrings();
for (const w of warnings) console.warn(`warn: ${w}`);
if (errors.length > 0) {
for (const e of errors) console.error(`error: ${e}`);
console.error(`\n${errors.length} translation error(s).`);
process.exit(1);
}
console.log(`i18n OK — ${supportedCodes.length} languages, ${warnings.length} warning(s).`);
}
main();