schmelczer-dev/src/pages/rss.xml.ts

103 lines
3.4 KiB
TypeScript

import rss from '@astrojs/rss';
import type { APIRoute } from 'astro';
import { experimental_AstroContainer as AstroContainer } from 'astro/container';
import { render } from 'astro:content';
import ogDefault from '../assets/og-default.jpg';
import {
absoluteUrl,
articlePath,
getPublishedPosts,
optimizeOgImage,
site,
} from '../lib/site';
// Escape characters that would otherwise break XML parsing inside text nodes
// (the `customData` strings are inserted as-is by @astrojs/rss).
function escapeXml(value: string) {
return value
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
}
// Rewrite root-relative URLs to absolute so RSS readers (which load the HTML
// outside any page context) can still resolve assets and links.
function absolutizeUrls(html: string, baseUrl: string) {
return html
.replace(/(<(?:a|link)\b[^>]*\bhref=")(\/[^"]*)(")/g, `$1${baseUrl}$2$3`)
.replace(
/(<(?:img|source|video|audio)\b[^>]*\bsrc=")(\/[^"]*)(")/g,
`$1${baseUrl}$2$3`
)
.replace(/(\bsrcset=")([^"]+)(")/g, (_, prefix, value, suffix) => {
const rewritten = value
.split(',')
.map((candidate: string) => {
const trimmed = candidate.trim();
if (!trimmed.startsWith('/')) return trimmed;
return baseUrl + trimmed;
})
.join(', ');
return prefix + rewritten + suffix;
});
}
export const GET: APIRoute = async () => {
const posts = await getPublishedPosts();
const feedUrl = absoluteUrl('/rss.xml');
const channelImage = await optimizeOgImage(ogDefault);
const channelImageUrl = absoluteUrl(channelImage.src);
const creator = escapeXml(site.name);
const container = await AstroContainer.create();
const items = await Promise.all(
posts.map(async (post) => {
const url = absoluteUrl(articlePath(post));
const updated = post.data.updated
? `<atom:updated>${post.data.updated.toISOString()}</atom:updated>`
: '';
const { Content } = await render(post);
const html = await container.renderToString(Content);
// @astrojs/rss XML-escapes the `content` string and emits it inside
// <content:encoded>. RSS readers decode the escaped HTML the same as if
// it were wrapped in CDATA, so escaping is fine and safer to author.
const content = absolutizeUrls(html, site.url);
return {
title: post.data.title,
description: post.data.description,
pubDate: post.data.date,
link: url,
author: `${site.email} (${site.name})`,
categories: [...post.data.tags],
content,
customData: [`<dc:creator>${creator}</dc:creator>`, updated]
.filter(Boolean)
.join('\n'),
};
})
);
return rss({
title: site.name,
description: site.description,
site: site.url,
xmlns: {
atom: 'http://www.w3.org/2005/Atom',
content: 'http://purl.org/rss/1.0/modules/content/',
dc: 'http://purl.org/dc/elements/1.1/',
},
customData: [
'<language>en-us</language>',
`<lastBuildDate>${new Date().toUTCString()}</lastBuildDate>`,
`<atom:link href="${feedUrl}" rel="self" type="application/rss+xml" />`,
'<image>',
` <url>${channelImageUrl}</url>`,
` <title>${escapeXml(site.name)}</title>`,
` <link>${site.url}</link>`,
'</image>',
].join('\n'),
items,
});
};