Fix data pipelines once and for all

This commit is contained in:
Andras Schmelczer 2026-06-10 21:27:32 +01:00
parent 08560476c5
commit 4012e4e047
46 changed files with 4508 additions and 855 deletions

View file

@ -107,6 +107,20 @@ const ROUTES = [
description:
'Learn how Perfect Postcode treats saved searches, account data and property research workflows with privacy and security in mind.',
},
{
path: '/terms',
output: 'terms/index.html',
title: 'Terms of Service | Perfect Postcode',
description:
'The terms that govern your use of Perfect Postcode, including lifetime access, acceptable use, data accuracy, payments and refunds.',
},
{
path: '/privacy',
output: 'privacy/index.html',
title: 'Privacy Policy | Perfect Postcode',
description:
'How Perfect Postcode collects, uses and protects your data: account details, payments, saved searches, AI queries, analytics and your UK GDPR rights.',
},
];
const FAQ_SCHEMA_ITEMS = [
@ -325,11 +339,16 @@ async function prerender() {
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
try {
const baseIndexHtml = cleanBaseIndexHtml(readFileSync(INDEX_PATH, 'utf-8'));
// Every real page renders tens of kB; a few hundred chars means the SPA
// raced hydration and we captured a loading shell.
const MIN_HTML_CHARS = 1000;
const MAX_ATTEMPTS = 3;
for (const route of ROUTES) {
const page = await browser.newPage();
async function renderRoute(route) {
// A fresh context per attempt: pages otherwise share cache/storage, and a
// poisoned chunk-fetch in the shared cache makes a route fail every retry.
const context = await browser.createBrowserContext();
const page = await context.newPage();
// Intercept API requests to prevent real fetches and retry loops.
await page.setRequestInterception(true);
@ -374,15 +393,16 @@ async function prerender() {
}
});
await page.goto(`http://127.0.0.1:${port}${route.path}`, {
waitUntil: 'networkidle0',
timeout: 30000,
});
try {
await page.goto(`http://127.0.0.1:${port}${route.path}`, {
waitUntil: 'networkidle0',
timeout: 30000,
});
await page.waitForSelector('h1', { timeout: 10000 });
await page.waitForSelector('h1', { timeout: 10000 });
// Extract and clean the rendered HTML.
const html = await page.evaluate(() => {
// Extract and clean the rendered HTML.
const html = await page.evaluate(() => {
const root = document.getElementById('root');
if (!root) return '';
@ -400,10 +420,33 @@ async function prerender() {
});
return root.innerHTML;
});
});
if (!html || html.length < 100) {
throw new Error(`Prerender produced too little HTML for ${route.path}`);
if (!html || html.length < MIN_HTML_CHARS) {
throw new Error(
`Prerender produced too little HTML for ${route.path} (${html?.length ?? 0} chars)`
);
}
return html;
} finally {
await context.close().catch(() => {});
}
}
try {
const baseIndexHtml = cleanBaseIndexHtml(readFileSync(INDEX_PATH, 'utf-8'));
for (const route of ROUTES) {
let html = null;
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt += 1) {
try {
html = await renderRoute(route);
break;
} catch (err) {
if (attempt === MAX_ATTEMPTS) throw err;
console.warn(`Retrying ${route.path} (attempt ${attempt} failed: ${err.message})`);
}
}
const updated = updateHead(baseIndexHtml, route).replace(
@ -418,7 +461,6 @@ async function prerender() {
const outputPath = join(DIST_DIR, route.output);
mkdirSync(dirname(outputPath), { recursive: true });
writeFileSync(outputPath, updated);
await page.close();
console.log(`Prerendered ${route.path} (${html.length} chars) into ${route.output}`);
}
} finally {