improve

2026-05-31 20:20:41 +01:00 · 2026-05-31 20:20:41 +01:00 · e8345cbdc1
commit e8345cbdc1
parent 8688b7475e
40 changed files with 1980 additions and 904 deletions
--- a/Makefile.data
+++ b/Makefile.data
@ -64,8 +64,6 @@ PBF             := $(DATA_DIR)/england-latest.osm.pbf
 FR_TOW          := $(DATA_DIR)/FR_TOW_V1_ALL.zip
 NFI             := $(DATA_DIR)/NFI_WOODLAND_ENGLAND.zip
 TREE_DENSITY_PC := $(DATA_DIR)/tree_density_by_postcode.parquet
-TREE_DENSITY_STREETS := $(DATA_DIR)/tree_density_by_street.parquet
-TREE_DENSITY_ADDR := $(DATA_DIR)/tree_density_by_address.parquet
 OFS_REGISTER    := $(DATA_DIR)/ofs_register.xlsx
 PLACES          := $(DATA_DIR)/places.parquet
 MEDIAN_AGE      := $(DATA_DIR)/median_age.parquet
@ -183,6 +181,7 @@ $(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGI
 		--oa-boundaries $(OA_BOUNDARIES) \
 		--inspire $(INSPIRE_DIR) \
 		--output $(PC_BOUNDARIES)
+	$(VALIDATE_OUTPUTS) --active-postcode-boundary-match "$(ARCGIS)::$(PC_BOUNDARIES)"
 	@touch $@
 generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
 	@if [ -f "$(R5_NETWORK_CACHE)" ] && { [ "$(PBF)" -nt "$(R5_NETWORK_CACHE)" ] || [ "$(TRANSIT_STAMP)" -nt "$(R5_NETWORK_CACHE)" ]; }; then \
@ -358,7 +357,7 @@ $(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(OFSTE
 $(EPC_PP): $(PRICE_PAID) $(EPC) pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
 	uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@

-$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES) pipeline/transform/crime_spatial.py pipeline/transform/postcode_boundaries/loader.py pipeline/transform/crime.py
+$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES_STAMP) pipeline/transform/crime_spatial.py pipeline/transform/postcode_boundaries/loader.py pipeline/transform/crime.py
 	$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
 	uv run python -m pipeline.transform.crime_spatial --input $(CRIME_DIR) --boundaries $(PC_BOUNDARIES)/units --output $(CRIME) --output-by-year $(CRIME_BY_YEAR)

@ -368,15 +367,12 @@ $(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DE
 $(SCHOOL_PROX): $(OFSTED) $(ARCGIS) pipeline/transform/school_proximity.py pipeline/utils/poi_counts.py
 	uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@

-$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS)
+$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(TREE_DENSITY_DEPS)
 	uv run python -m pipeline.transform.tree_density \
 		--tow-zip $(FR_TOW) \
 		--nfi-zip $(NFI) \
 		--arcgis $(ARCGIS) \
-		--price-paid $(PRICE_PAID) \
-		--output-postcodes $(TREE_DENSITY_PC) \
-		--output-streets $(TREE_DENSITY_STREETS) \
-		--output-addresses $(TREE_DENSITY_ADDR)
+		--output-postcodes $(TREE_DENSITY_PC)

 # Postcode boundaries require manual generation — fail with instructions
 $(PC_BOUNDARIES):
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@ -81,6 +81,15 @@ function isProtectedPage(page: Page): boolean {
  return page === 'account' || page === 'saved';
 }

+function isSharedDashboardUrl(): boolean {
+  const share = new URLSearchParams(window.location.search).get('share');
+  return !!share && /^[a-z0-9]{1,20}$/i.test(share);
+}
+
+function isAuthRequiredRoute(page: Page): boolean {
+  return isProtectedPage(page) || (page === 'dashboard' && !isSharedDashboardUrl());
+}
+
 function buildPageUrl(page: Page, inviteCode?: string, search = '', hash = ''): string {
  const normalizedHash = normalizeHash(hash);
  return `${pageToPath(page, inviteCode)}${search}${normalizedHash ? `#${normalizedHash}` : ''}`;
@ -235,6 +244,7 @@ export default function App() {
  const postAuthCheckoutReturnPathRef = useRef<string | null>(null);
  const authCompletedRef = useRef(false);
  const [licenseSuccessStatus, setLicenseSuccessStatus] = useState<LicenseSuccessStatus>('hidden');
+  const [dashboardReady, setDashboardReady] = useState(false);

  // Keep a ref to the latest refreshAuth so the mount-only startup effect always
  // calls the current implementation without re-running when the callback identity changes.
@ -266,7 +276,7 @@ export default function App() {
    if (!completed) {
      setPostAuthIntent(null);
      postAuthCheckoutReturnPathRef.current = null;
-      if (isProtectedPage(activePageRef.current)) {
+      if (isAuthRequiredRoute(activePageRef.current)) {
        window.history.replaceState({ page: 'home', hash: '' }, '', '/');
        setRouteHash('');
        setActivePage('home');
@ -517,7 +527,10 @@ export default function App() {
    }
  }, [activePage, fetchSearches]);

-  const isAuthRequiredPage = activePage === 'account' || activePage === 'saved';
+  const isAuthRequiredPage =
+    activePage === 'account' ||
+    activePage === 'saved' ||
+    (activePage === 'dashboard' && !mapUrlState.share);
  useEffect(() => {
    if (authLoading) return;
    if (isAuthRequiredPage && !user) {
@ -530,6 +543,13 @@ export default function App() {

  const [exportState, setExportState] = useState<ExportState | null>(null);

+  useEffect(() => {
+    if (activePage !== 'dashboard' || !user) {
+      setDashboardReady(false);
+      setExportState(null);
+    }
+  }, [activePage, user]);
+
  if ((isScreenshotMode || isOgMode) && inviteCode) {
    return (
      <Suspense fallback={<PageFallback />}>
@ -584,8 +604,9 @@ export default function App() {
        onPageChange={navigateTo}
        theme={theme}
        onToggleTheme={toggleTheme}
-        exportState={activePage === 'dashboard' ? exportState : null}
+        exportState={activePage === 'dashboard' && user ? exportState : null}
        dashboardParams={activePage === 'dashboard' ? dashboardParams : ''}
+        dashboardActionsDisabled={activePage === 'dashboard' && !dashboardReady}
        onSaveSearch={
          activePage === 'dashboard' && user
            ? editingSearch
@ -675,6 +696,7 @@ export default function App() {
            onNavigateTo={navigateTo}
            onExportStateChange={setExportState}
            onDashboardParamsChange={setDashboardParams}
+            onDashboardReadyChange={setDashboardReady}
            isMobile={isMobile}
            initialTravelTime={mapUrlState.travelTime}
            initialPostcode={mapUrlState.postcode}
--- a/frontend/src/components/account/AccountPage.tsx
+++ b/frontend/src/components/account/AccountPage.tsx
@ -461,6 +461,24 @@ interface ShareLinkListItem {
  created: string;
 }

+function latestPendingInviteUrls(invites: InviteListItem[]): Record<string, string> {
+  const latestByType: Record<string, { url: string; createdMs: number }> = {};
+
+  for (const invite of invites) {
+    if (invite.used || !invite.url) continue;
+
+    const createdMs = Date.parse(invite.created) || 0;
+    const existing = latestByType[invite.invite_type];
+    if (!existing || createdMs > existing.createdMs) {
+      latestByType[invite.invite_type] = { url: invite.url, createdMs };
+    }
+  }
+
+  return Object.fromEntries(
+    Object.entries(latestByType).map(([type, invite]) => [type, invite.url])
+  );
+}
+
 function InviteTable({
  invites,
  loading,
@ -673,7 +691,16 @@ function InviteSection({ user }: { user: AuthUser }) {
      const res = await fetch(apiUrl('invites'), authHeaders());
      assertOk(res, 'Fetch invites');
      const data = await res.json();
-      setInviteHistory(data.invites);
+      const invites: InviteListItem[] = Array.isArray(data.invites) ? data.invites : [];
+      setInviteHistory(invites);
+      const pendingInviteUrls = latestPendingInviteUrls(invites);
+      setInviteUrl((prev) => {
+        const next = { ...prev };
+        for (const [type, url] of Object.entries(pendingInviteUrls)) {
+          if (!next[type]) next[type] = url;
+        }
+        return next;
+      });
    } catch {
      // Silent — non-critical
    } finally {
--- a/frontend/src/components/map/LocationSearch.test.tsx
+++ b/frontend/src/components/map/LocationSearch.test.tsx
@ -8,8 +8,11 @@ const RECENT_SEARCHES_STORAGE_KEY = 'perfect-postcode.locationSearch.recent';

 vi.mock('react-i18next', () => ({
  useTranslation: () => ({
-    t: (key: string) =>
-      key === 'locationSearch.placeholder' ? 'Search places or postcodes...' : key,
+    t: (key: string) => {
+      if (key === 'locationSearch.placeholder') return 'Search places or postcodes...';
+      if (key === 'locationSearch.noResults') return 'No matching places or postcodes';
+      return key;
+    },
  }),
 }));

@ -226,6 +229,91 @@ describe('LocationSearch', () => {
    );
  });

+  it('selects the first place suggestion with Enter when none is highlighted', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn((input: string | URL | Request) => {
+        const url = new URL(String(input), 'http://localhost');
+        if (url.pathname === '/api/places') {
+          return Promise.resolve(
+            jsonResponse({
+              places: [
+                {
+                  type: 'place',
+                  name: 'London',
+                  slug: 'london',
+                  place_type: 'city',
+                  lat: 51.5074,
+                  lon: -0.1278,
+                },
+              ],
+              postcodes: [],
+              addresses: [],
+            })
+          );
+        }
+        if (url.pathname === '/api/nearest-postcode') {
+          return Promise.resolve(
+            jsonResponse({
+              postcode: 'SW1A 1AA',
+              latitude: 51.501,
+              longitude: -0.141,
+              geometry: postcodeGeometry,
+            })
+          );
+        }
+        return Promise.resolve(new Response(null, { status: 404 }));
+      })
+    );
+
+    const onFlyTo = vi.fn();
+    const onLocationSearched = vi.fn();
+    render(<LocationSearch onFlyTo={onFlyTo} onLocationSearched={onLocationSearched} />);
+
+    const input = screen.getByRole('textbox');
+    fireEvent.change(input, { target: { value: 'London' } });
+
+    await screen.findByRole('button', { name: 'London' });
+    fireEvent.keyDown(input, { key: 'Enter' });
+
+    await waitFor(() => {
+      expect(onLocationSearched).toHaveBeenCalledTimes(1);
+    });
+    expect(onFlyTo).toHaveBeenCalledWith(51.5074, -0.1278, 10);
+    expect(onLocationSearched).toHaveBeenCalledWith({
+      postcode: 'SW1A 1AA',
+      geometry: postcodeGeometry,
+      latitude: 51.501,
+      longitude: -0.141,
+      zoom: 10,
+      markerLatitude: 51.5074,
+      markerLongitude: -0.1278,
+    });
+  });
+
+  it('shows an empty state for invalid place queries', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn(() =>
+        Promise.resolve(
+          jsonResponse({
+            places: [],
+            postcodes: [],
+            addresses: [],
+          })
+        )
+      )
+    );
+
+    render(<LocationSearch onFlyTo={vi.fn()} onLocationSearched={vi.fn()} />);
+
+    fireEvent.change(screen.getByRole('textbox'), { target: { value: '!!!!zzzzzz!!!!' } });
+
+    await waitFor(() => {
+      expect(screen.getByText('No matching places or postcodes')).toBeTruthy();
+    });
+  });
+
  it('keeps only the three most recent local searches', async () => {
    vi.stubGlobal(
      'fetch',
--- a/frontend/src/components/map/LocationSearch.tsx
+++ b/frontend/src/components/map/LocationSearch.tsx
@ -333,6 +333,8 @@ export default function LocationSearch({
          onSelect={selectResult}
          loading={loading}
          placeholder={t('locationSearch.placeholder')}
+          ariaLabel={t('locationSearch.searchLabel')}
+          name="location-search"
          size="sm"
          inputClassName={
            inputClassName ??
--- a/frontend/src/components/map/MapPage.tsx
+++ b/frontend/src/components/map/MapPage.tsx
@ -91,6 +91,7 @@ export default function MapPage({
  onNavigateTo,
  onExportStateChange,
  onDashboardParamsChange,
+  onDashboardReadyChange,
  screenshotMode,
  ogMode,
  isMobile = false,
@ -642,6 +643,23 @@ export default function MapPage({
    onDashboardParamsChange?.(dashboardParams);
  }, [dashboardParams, onDashboardParamsChange]);

+  const dashboardReady =
+    !initialLoading &&
+    !mapData.loading &&
+    !mapData.licenseRequired &&
+    mapData.bounds != null &&
+    mapData.currentView != null;
+
+  useEffect(() => {
+    onDashboardReadyChange?.(dashboardReady);
+  }, [dashboardReady, onDashboardReadyChange]);
+
+  useEffect(() => {
+    return () => {
+      onDashboardReadyChange?.(false);
+    };
+  }, [onDashboardReadyChange]);
+
  useEffect(() => {
    if (mapData.licenseRequired) trackEvent('Upgrade Modal Shown');
  }, [mapData.licenseRequired]);
@ -830,8 +848,8 @@ export default function MapPage({
        </button>
        <button
          onClick={() => onUpdateEdit?.(dashboardParams)}
-          disabled={savingSearch}
-          className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium bg-teal-600 text-white hover:bg-teal-700 disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
+          disabled={savingSearch || !dashboardReady}
+          className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium bg-teal-600 text-white hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
        >
          {savingSearch ? t('savedPage.updating') : t('common.update')}
        </button>
--- a/frontend/src/components/map/POIPane.tsx
+++ b/frontend/src/components/map/POIPane.tsx
@ -186,7 +186,7 @@ export default function POIPane({
              </div>
              {!isCollapsed && (
                <div className="px-3 py-2">
-                  <PillGroup>
+                  <PillGroup wrap>
                    {group.categories.map((category) => {
                      const logo = getPoiCategoryLogoUrl(category);
                      return (
--- a/frontend/src/components/map/map-page/DesktopMapPage.tsx
+++ b/frontend/src/components/map/map-page/DesktopMapPage.tsx
@ -269,7 +269,10 @@ export function DesktopMapPage({
          </div>
        )}
        {poiPaneOpen && (
-          <div className="absolute bottom-28 right-4 z-10 flex h-[60vh] min-h-0 w-80 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
+          <div
+            className="absolute bottom-28 right-4 z-10 flex min-h-0 w-80 max-w-[calc(100%_-_2rem)] flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900"
+            style={{ height: 'min(30rem, calc(100vh - 10rem))' }}
+          >
            {poiPane}
          </div>
        )}
--- a/frontend/src/components/map/map-page/MobileMapPage.tsx
+++ b/frontend/src/components/map/map-page/MobileMapPage.tsx
@ -132,6 +132,10 @@ export function MobileMapPage({
  upgradeModal,
  editingBar,
 }: MobileMapPageProps) {
+  const floatingPaneAvailableHeight = `max(12rem, calc(100dvh - ${Math.ceil(
+    bottomScreenInset
+  )}px - 7rem))`;
+
  return (
    <div className="flex-1 overflow-hidden relative">
      <LoadingOverlay show={initialLoading} />
@ -219,7 +223,13 @@ export function MobileMapPage({
      )}

      {poiPaneOpen && (
-        <div className="absolute top-24 right-3 left-3 z-20 flex h-[45dvh] min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
+        <div
+          className="absolute top-24 right-3 left-3 z-20 flex min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900"
+          style={{
+            height: `min(22rem, ${floatingPaneAvailableHeight})`,
+            maxHeight: floatingPaneAvailableHeight,
+          }}
+        >
          {poiPane}
        </div>
      )}
--- a/frontend/src/components/map/map-page/types.ts
+++ b/frontend/src/components/map/map-page/types.ts
@ -39,6 +39,7 @@ export interface MapPageProps {
  onNavigateTo: (page: Page, hash?: string, infoFeature?: string) => void;
  onExportStateChange?: (state: ExportState) => void;
  onDashboardParamsChange?: (params: string) => void;
+  onDashboardReadyChange?: (ready: boolean) => void;
  screenshotMode?: boolean;
  ogMode?: boolean;
  isMobile?: boolean;
--- a/frontend/src/components/ui/AuthModal.tsx
+++ b/frontend/src/components/ui/AuthModal.tsx
@ -1,4 +1,4 @@
-import { useState, useCallback, useEffect } from 'react';
+import { useState, useCallback, useEffect, useId } from 'react';
 import { useTranslation } from 'react-i18next';
 import { CloseIcon } from './icons/CloseIcon';
 import { GoogleIcon } from './icons/GoogleIcon';
@ -36,6 +36,9 @@ export default function AuthModal({
  const [password, setPassword] = useState('');
  const [resetSent, setResetSent] = useState(false);
  const dialogRef = useModalA11y();
+  const fieldId = useId();
+  const emailInputId = `${fieldId}-email`;
+  const passwordInputId = `${fieldId}-password`;

  useEffect(() => {
    trackEvent('Auth Modal Open', { tab: initialTab });
@ -194,14 +197,20 @@ export default function AuthModal({
          {/* Email form */}
          <form onSubmit={handleSubmit} className="space-y-4">
            <div>
-              <label className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1">
+              <label
+                htmlFor={emailInputId}
+                className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1"
+              >
                {t('auth.email')}
              </label>
              <input
+                id={emailInputId}
+                name="email"
                type="email"
                value={email}
                onChange={(e) => setEmail(e.target.value)}
                required
+                autoComplete="email"
                className="w-full px-3 py-2 text-sm rounded border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-800 text-navy-950 dark:text-white placeholder-warm-400 dark:placeholder-warm-500 outline-none focus:ring-2 ring-teal-400 dark:ring-teal-500"
                placeholder={t('auth.emailPlaceholder')}
              />
@ -209,15 +218,21 @@ export default function AuthModal({

            {view !== 'forgot' && (
              <div>
-                <label className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1">
+                <label
+                  htmlFor={passwordInputId}
+                  className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1"
+                >
                  {t('auth.password')}
                </label>
                <input
+                  id={passwordInputId}
+                  name="password"
                  type="password"
                  value={password}
                  onChange={(e) => setPassword(e.target.value)}
                  required
                  minLength={8}
+                  autoComplete={view === 'register' ? 'new-password' : 'current-password'}
                  className="w-full px-3 py-2 text-sm rounded border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-800 text-navy-950 dark:text-white placeholder-warm-400 dark:placeholder-warm-500 outline-none focus:ring-2 ring-teal-400 dark:ring-teal-500"
                  placeholder={
                    view === 'register'
--- a/frontend/src/components/ui/Header.tsx
+++ b/frontend/src/components/ui/Header.tsx
@ -78,6 +78,7 @@ export default function Header({
  onToggleTheme,
  exportState,
  dashboardParams,
+  dashboardActionsDisabled = false,
  onSaveSearch,
  savingSearch,
  editingSearch,
@ -96,6 +97,7 @@ export default function Header({
  onToggleTheme: () => void;
  exportState: HeaderExportState | null;
  dashboardParams: string;
+  dashboardActionsDisabled?: boolean;
  onSaveSearch: (() => void) | null;
  savingSearch: boolean;
  editingSearch: EditingSearchState | null;
@ -116,6 +118,7 @@ export default function Header({
    () => window.matchMedia(DASHBOARD_TABLET_SIDEBAR_QUERY).matches
  );
  const useSidebarNav = isMobile || (activePage === 'dashboard' && isDashboardTabletSidebarWidth);
+  const dashboardActionsBlocked = activePage === 'dashboard' && (!user || dashboardActionsDisabled);

  useEffect(() => {
    const mql = window.matchMedia(DASHBOARD_TABLET_SIDEBAR_QUERY);
@ -139,6 +142,10 @@ export default function Header({
    if (!useSidebarNav) setMenuOpen(false);
  }, [useSidebarNav]);

+  useEffect(() => {
+    if (dashboardActionsBlocked) setExportMenuOpen(false);
+  }, [dashboardActionsBlocked]);
+
  const doCopy = useCallback((text: string) => {
    copyToClipboard(text, () => {
      setCopied(true);
@ -147,6 +154,7 @@ export default function Header({
  }, []);

  const handleShare = useCallback(async () => {
+    if (dashboardActionsBlocked) return;
    const params =
      activePage === 'dashboard' ? dashboardParams : window.location.search.replace(/^\?/, '');
    if (!params) {
@ -167,7 +175,7 @@ export default function Header({
    } finally {
      setSharing(false);
    }
-  }, [activePage, dashboardParams, doCopy, i18n.language]);
+  }, [activePage, dashboardActionsBlocked, dashboardParams, doCopy, i18n.language]);

  const navLink = (page: Page, e: React.MouseEvent, hash?: string) => {
    if (e.metaKey || e.ctrlKey || e.shiftKey || e.button !== 0) return;
@ -206,8 +214,8 @@ export default function Header({
              </button>
              <button
                onClick={onUpdateEdit}
-                disabled={savingSearch}
-                className="cursor-pointer px-3 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
+                disabled={savingSearch || dashboardActionsBlocked}
+                className="cursor-pointer px-3 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
              >
                {savingSearch && <SpinnerIcon className="w-4 h-4 animate-spin" />}
                {savingSearch ? t('savedPage.updating') : t('common.update')}
@ -216,14 +224,16 @@ export default function Header({
          </div>
        )}
        {/* Left: Logo + nav */}
-        <div className="flex items-center gap-4">
+        <div className="flex min-w-0 items-center gap-4">
          <a
            href="/"
-            className="flex cursor-pointer items-center gap-2 hover:opacity-80 transition-opacity"
+            className="flex min-w-0 cursor-pointer items-center gap-2 hover:opacity-80 transition-opacity"
            onClick={(e) => navLink('home', e)}
          >
-            <LogoIcon className="w-5 h-5 text-teal-400" />
-            <span className="text-lg font-semibold text-teal-300">{t('header.appName')}</span>
+            <LogoIcon className="w-5 h-5 shrink-0 text-teal-400" />
+            <span className="max-w-[9rem] truncate whitespace-nowrap text-lg font-semibold text-teal-300 sm:max-w-none">
+              {t('header.appName')}
+            </span>
          </a>

          {/* Desktop nav */}
@ -266,14 +276,14 @@ export default function Header({
        </div>

        {/* Right side */}
-        <div className="flex items-center gap-2 ml-auto">
+        <div className="ml-auto flex shrink-0 items-center gap-2">
          {/* Desktop-only dashboard actions */}
-          {!useSidebarNav && activePage === 'dashboard' && (
+          {!useSidebarNav && activePage === 'dashboard' && user && (
            <>
              <button
                onClick={handleShare}
-                disabled={sharing}
-                className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
+                disabled={sharing || dashboardActionsBlocked}
+                className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
              >
                {sharing ? (
                  <>
@ -295,8 +305,8 @@ export default function Header({
              {exportState && (
                <button
                  onClick={() => setExportMenuOpen(true)}
-                  disabled={exportState.exporting}
-                  className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
+                  disabled={exportState.exporting || dashboardActionsBlocked}
+                  className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
                  title={t('header.exportToExcel')}
                >
                  <DownloadIcon className="w-4 h-4" />
@ -306,8 +316,8 @@ export default function Header({
              {onSaveSearch && !editingSearch && (
                <button
                  onClick={onSaveSearch}
-                  disabled={savingSearch}
-                  className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
+                  disabled={savingSearch || dashboardActionsBlocked}
+                  className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
                >
                  {savingSearch ? (
                    <SpinnerIcon className="w-4 h-4 animate-spin" />
@ -363,7 +373,7 @@ export default function Header({
          {useSidebarNav && !user && (
            <button
              onClick={onRegisterClick}
-              className="cursor-pointer px-4 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-semibold"
+              className="flex h-8 max-w-[8.5rem] shrink-0 cursor-pointer items-center justify-center truncate whitespace-nowrap rounded bg-teal-600 px-2.5 text-xs font-semibold leading-none transition-colors hover:bg-teal-700 sm:max-w-none sm:px-3 sm:text-sm"
            >
              {t('header.createAccount')}
            </button>
@ -410,6 +420,7 @@ export default function Header({
          onToggleTheme={onToggleTheme}
          exportState={exportState}
          onOpenExportMenu={() => setExportMenuOpen(true)}
+          dashboardActionsDisabled={dashboardActionsBlocked}
          onSaveSearch={onSaveSearch}
          savingSearch={savingSearch}
          isEditingSearch={!!editingSearch}
--- a/frontend/src/components/ui/MobileMenu.tsx
+++ b/frontend/src/components/ui/MobileMenu.tsx
@ -20,6 +20,7 @@ interface MobileMenuProps {
  onToggleTheme: () => void;
  exportState: HeaderExportState | null;
  onOpenExportMenu: () => void;
+  dashboardActionsDisabled?: boolean;
  onSaveSearch: (() => void) | null;
  savingSearch: boolean;
  isEditingSearch: boolean;
@ -41,6 +42,7 @@ export default function MobileMenu({
  onToggleTheme,
  exportState,
  onOpenExportMenu,
+  dashboardActionsDisabled = false,
  onSaveSearch,
  savingSearch,
  isEditingSearch,
@ -101,7 +103,7 @@ export default function MobileMenu({
    </a>
  );

-  const dashboardActions = activePage === 'dashboard' && (
+  const dashboardActions = activePage === 'dashboard' && user && (
    <div className="px-2 py-2 border-b border-navy-700">
      <div className="grid grid-cols-2 gap-2">
        <button
@ -109,7 +111,7 @@ export default function MobileMenu({
            onShare();
            onClose();
          }}
-          disabled={sharing}
+          disabled={sharing || dashboardActionsDisabled}
          className={dashboardActionClass}
        >
          {sharing ? (
@ -127,8 +129,8 @@ export default function MobileMenu({
              onClose();
              onOpenExportMenu();
            }}
-            disabled={exportState.exporting}
            className={dashboardActionClass}
+            disabled={exportState.exporting || dashboardActionsDisabled}
          >
            <DownloadIcon className="w-4 h-4" />
            {exportState.exporting ? t('header.exporting') : t('header.exportLabel')}
@ -140,7 +142,7 @@ export default function MobileMenu({
              onSaveSearch();
              onClose();
            }}
-            disabled={savingSearch}
+            disabled={savingSearch || dashboardActionsDisabled}
            className={dashboardActionClass}
          >
            {savingSearch ? (
@ -199,7 +201,7 @@ export default function MobileMenu({
          </button>

          {/* Language selector */}
-          <div className="flex max-w-full gap-1 overflow-x-auto overflow-y-hidden px-3 pb-1 scrollbar-hide">
+          <div className="grid max-w-full grid-cols-3 gap-1 px-3 pb-1">
            {SUPPORTED_LANGUAGES.map((lang) => (
              <button
                key={lang.code}
@ -208,7 +210,7 @@ export default function MobileMenu({
                  localStorage.setItem('language', lang.code);
                  void changeAppLanguage(lang.code);
                }}
-                className={`flex-none min-w-[2.5rem] flex cursor-pointer items-center justify-center gap-1.5 px-2 py-1.5 rounded text-sm ${
+                className={`flex min-w-0 cursor-pointer items-center justify-center gap-1.5 rounded px-2 py-1.5 text-sm ${
                  i18n.language === lang.code
                    ? 'bg-navy-700 text-white font-medium'
                    : 'text-warm-400 hover:bg-navy-800 hover:text-white'
--- a/frontend/src/components/ui/PillGroup.tsx
+++ b/frontend/src/components/ui/PillGroup.tsx
@ -3,12 +3,17 @@ import type { ReactNode } from 'react';
 interface PillGroupProps {
  children: ReactNode;
  className?: string;
+  wrap?: boolean;
 }

-export function PillGroup({ children, className = '' }: PillGroupProps) {
+export function PillGroup({ children, className = '', wrap = false }: PillGroupProps) {
  return (
    <div
-      className={`flex min-w-0 max-w-full flex-nowrap gap-1.5 overflow-x-auto overscroll-x-contain touch-pan-x touch-pan-y scrollbar-hide md:flex-wrap md:overflow-x-visible ${className}`}
+      className={`flex min-w-0 max-w-full gap-1.5 ${
+        wrap
+          ? 'flex-wrap overflow-x-visible'
+          : 'flex-nowrap overflow-x-auto overscroll-x-contain touch-pan-x touch-pan-y scrollbar-hide md:flex-wrap md:overflow-x-visible'
+      } ${className}`}
    >
      {children}
    </div>
--- a/frontend/src/components/ui/PlaceSearchInput.tsx
+++ b/frontend/src/components/ui/PlaceSearchInput.tsx
@ -1,5 +1,6 @@
 import { useRef } from 'react';
 import { createPortal } from 'react-dom';
+import { useTranslation } from 'react-i18next';
 import type React from 'react';
 import type { SearchResult } from '../../hooks/useLocationSearch';
 import { useDropdownPosition } from '../../hooks/useDropdownPosition';
@ -13,6 +14,7 @@ interface SearchHook {
  activeIndex: number;
  setActiveIndex: (idx: number) => void;
  open: boolean;
+  searching?: boolean;
  handleInputChange: (value: string) => void;
  handleKeyDown: (e: React.KeyboardEvent, onSelect: (result: SearchResult) => void) => void;
  showEmptySearches: () => void;
@ -23,6 +25,8 @@ interface PlaceSearchInputProps {
  onSelect: (result: SearchResult) => void;
  loading?: boolean;
  placeholder?: string;
+  ariaLabel?: string;
+  name?: string;
  size?: 'sm' | 'xs';
  inputClassName?: string;
  inputRef?: React.Ref<HTMLInputElement>;
@ -35,19 +39,28 @@ export function PlaceSearchInput({
  onSelect,
  loading,
  placeholder,
+  ariaLabel,
+  name,
  size = 'sm',
  inputClassName,
  inputRef,
  onInputChange,
  portal,
 }: PlaceSearchInputProps) {
+  const { t } = useTranslation();
  const sm = size === 'sm';
  const iconSize = sm ? 'w-4 h-4' : 'w-3 h-3';
  const spinnerSize = sm ? 'w-4 h-4' : 'w-3 h-3';
  const wrapperRef = useRef<HTMLDivElement>(null);
  const dropdownPos = useDropdownPosition(wrapperRef, portal ? search.open : false);

-  const showDropdown = search.open && search.results.length > 0;
+  const showEmptyResults =
+    search.open &&
+    !search.searching &&
+    search.query.trim().length >= 2 &&
+    search.results.length === 0;
+  const showDropdown = search.open && (search.results.length > 0 || showEmptyResults);
+  const showSpinner = loading || search.searching;

  const dropdown = showDropdown && (
    <div
@ -64,57 +77,66 @@ export function PlaceSearchInput({
          : undefined
      }
    >
-      {search.results.map((result, idx) => (
-        <button
-          key={
-            result.type === 'postcode'
-              ? `pc-${result.label}`
-              : result.type === 'address'
-                ? `addr-${result.postcode}-${result.address}-${result.lat}`
-                : `pl-${result.name}-${result.lat}`
-          }
-          type="button"
-          className={`w-full text-left flex items-center cursor-pointer ${
-            sm ? 'px-3 py-2 gap-2 text-sm' : 'px-2 py-1.5 gap-1.5 text-xs'
-          } ${
-            idx === search.activeIndex
-              ? 'bg-teal-50 dark:bg-teal-900/30'
-              : 'hover:bg-warm-50 dark:hover:bg-warm-700'
-          }`}
-          onMouseEnter={() => search.setActiveIndex(idx)}
-          onMouseDown={(e) => {
-            e.preventDefault();
-            onSelect(result);
-          }}
+      {showEmptyResults ? (
+        <div
+          className={`text-warm-500 dark:text-warm-400 ${sm ? 'px-3 py-2 text-sm' : 'px-2 py-1.5 text-xs'}`}
+          role="status"
        >
-          {result.type === 'postcode' ? (
-            <>
-              <SearchIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
-              <span className="text-warm-700 dark:text-warm-200">{result.label}</span>
-            </>
-          ) : result.type === 'address' ? (
-            <>
-              <HouseIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
-              <span className="min-w-0 text-warm-700 dark:text-warm-200">
-                <span className="block truncate">{result.address}</span>
-                <span className="block truncate text-warm-400 dark:text-warm-500">
-                  {result.postcode}
+          {t('locationSearch.noResults')}
+        </div>
+      ) : (
+        search.results.map((result, idx) => (
+          <button
+            key={
+              result.type === 'postcode'
+                ? `pc-${result.label}`
+                : result.type === 'address'
+                  ? `addr-${result.postcode}-${result.address}-${result.lat}`
+                  : `pl-${result.name}-${result.lat}`
+            }
+            type="button"
+            className={`w-full text-left flex items-center cursor-pointer ${
+              sm ? 'px-3 py-2 gap-2 text-sm' : 'px-2 py-1.5 gap-1.5 text-xs'
+            } ${
+              idx === search.activeIndex
+                ? 'bg-teal-50 dark:bg-teal-900/30'
+                : 'hover:bg-warm-50 dark:hover:bg-warm-700'
+            }`}
+            onMouseEnter={() => search.setActiveIndex(idx)}
+            onMouseDown={(e) => {
+              e.preventDefault();
+              onSelect(result);
+            }}
+          >
+            {result.type === 'postcode' ? (
+              <>
+                <SearchIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
+                <span className="text-warm-700 dark:text-warm-200">{result.label}</span>
+              </>
+            ) : result.type === 'address' ? (
+              <>
+                <HouseIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
+                <span className="min-w-0 text-warm-700 dark:text-warm-200">
+                  <span className="block truncate">{result.address}</span>
+                  <span className="block truncate text-warm-400 dark:text-warm-500">
+                    {result.postcode}
+                  </span>
                </span>
-              </span>
-            </>
-          ) : (
-            <>
-              <MapPinIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
-              <span className="text-warm-700 dark:text-warm-200">
-                {result.name}
-                {result.city && (
-                  <span className="text-warm-400 dark:text-warm-500"> ({result.city})</span>
-                )}
-              </span>
-            </>
-          )}
-        </button>
-      ))}
+              </>
+            ) : (
+              <>
+                <MapPinIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
+                <span className="text-warm-700 dark:text-warm-200">
+                  {result.name}
+                  {result.city && (
+                    <span className="text-warm-400 dark:text-warm-500"> ({result.city})</span>
+                  )}
+                </span>
+              </>
+            )}
+          </button>
+        ))
+      )}
    </div>
  );

@ -123,6 +145,7 @@ export function PlaceSearchInput({
      <input
        ref={inputRef}
        type="text"
+        name={name}
        value={search.query}
        onChange={(e) => {
          search.handleInputChange(e.target.value);
@ -132,11 +155,12 @@ export function PlaceSearchInput({
          search.showEmptySearches();
        }}
        onKeyDown={(e) => search.handleKeyDown(e, onSelect)}
+        aria-label={ariaLabel ?? placeholder}
        placeholder={placeholder}
        className={inputClassName}
      />

-      {loading && (
+      {showSpinner && (
        <div
          className={`absolute right-2 top-1/2 -translate-y-1/2 ${spinnerSize} border-2 border-warm-300 dark:border-warm-600 border-t-teal-500 rounded-full animate-spin`}
        />
--- a/frontend/src/components/ui/SearchInput.tsx
+++ b/frontend/src/components/ui/SearchInput.tsx
@ -4,18 +4,27 @@ interface SearchInputProps {
  value: string;
  onChange: (value: string) => void;
  placeholder?: string;
+  ariaLabel?: string;
  className?: string;
 }

-export function SearchInput({ value, onChange, placeholder, className = '' }: SearchInputProps) {
+export function SearchInput({
+  value,
+  onChange,
+  placeholder,
+  ariaLabel,
+  className = '',
+}: SearchInputProps) {
  const { t } = useTranslation();
+  const inputPlaceholder = placeholder ?? t('common.search');

  return (
    <input
      type="text"
      value={value}
      onChange={(e) => onChange(e.target.value)}
-      placeholder={placeholder ?? t('common.search')}
+      placeholder={inputPlaceholder}
+      aria-label={ariaLabel ?? inputPlaceholder}
      className={`w-full px-2 py-1 text-sm border rounded bg-white dark:bg-navy-800 dark:text-warm-200 border-warm-200 dark:border-navy-700 placeholder-warm-400 dark:placeholder-warm-500 focus:outline-none focus:ring-1 focus:ring-teal-400 ${className}`}
    />
  );
--- a/frontend/src/hooks/useListingLayers.ts
+++ b/frontend/src/hooks/useListingLayers.ts
@ -13,6 +13,7 @@ const LISTING_CLUSTER_MAX_ZOOM = 24;
 const LISTING_CLUSTER_POPUP_LIMIT = 30;
 const LISTING_SPIDERFY_LIMIT = 12;
 const TILE_SIZE = 512;
+const PRICE_LABEL_CHARACTER_SET = '£0123456789.kM';

 interface SingleListingPopupInfo {
  mode: 'single';
@ -472,6 +473,7 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
      outlineWidth: 3,
      outlineColor: isDark ? [10, 10, 10, 220] : [255, 255, 255, 230],
      fontSettings: { sdf: true },
+      characterSet: PRICE_LABEL_CHARACTER_SET,
      sizeUnits: 'pixels',
      sizeMinPixels: 10,
      sizeMaxPixels: 14,
--- a/frontend/src/hooks/useLocationSearch.ts
+++ b/frontend/src/hooks/useLocationSearch.ts
@ -160,6 +160,7 @@ export function useLocationSearch(mode?: string) {
  const [recentSearches, setRecentSearches] = useState<SearchResult[]>(readRecentSearches);
  const [activeIndex, setActiveIndex] = useState(-1);
  const [open, setOpen] = useState(false);
+  const [searching, setSearching] = useState(false);
  const abortRef = useRef<AbortController | null>(null);
  const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
  const latestQueryRef = useRef('');
@ -176,6 +177,7 @@ export function useLocationSearch(mode?: string) {

      const trimmed = value.trim();
      if (!trimmed) {
+        setSearching(false);
        setResults(recentSearches);
        lastResultsRef.current = [];
        setOpen(recentSearches.length > 0);
@ -183,6 +185,7 @@ export function useLocationSearch(mode?: string) {
      }

      if (!mode && looksLikePostcode(trimmed)) {
+        setSearching(false);
        const postcodeResults: SearchResult[] = [
          { type: 'postcode', label: normalizePostcode(trimmed) },
        ];
@ -192,6 +195,7 @@ export function useLocationSearch(mode?: string) {
      }

      if (trimmed.length < 2) {
+        setSearching(false);
        setResults([]);
        setOpen(false);
        return;
@ -200,6 +204,7 @@ export function useLocationSearch(mode?: string) {
      const locallyFilteredResults = filterResultsForQuery(lastResultsRef.current, trimmed);
      setResults(locallyFilteredResults);
      setOpen(locallyFilteredResults.length > 0);
+      setSearching(true);

      debounceRef.current = setTimeout(async () => {
        const controller = new AbortController();
@ -211,7 +216,13 @@ export function useLocationSearch(mode?: string) {
            `/api/places?${params}`,
            authHeaders({ signal: controller.signal })
          );
-          if (!res.ok) return;
+          if (!res.ok) {
+            if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
+              setResults([]);
+              setOpen(true);
+            }
+            return;
+          }
          const json: {
            places: PlaceResult[];
            postcodes?: string[];
@ -253,9 +264,17 @@ export function useLocationSearch(mode?: string) {
          lastResultsRef.current = combinedResults;
          const matchingResults = filterResultsForQuery(combinedResults, trimmed);
          setResults(matchingResults);
-          setOpen(matchingResults.length > 0);
+          setOpen(true);
        } catch (err) {
          logNonAbortError('places search', err);
+          if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
+            setResults([]);
+            setOpen(true);
+          }
+        } finally {
+          if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
+            setSearching(false);
+          }
        }
      }, 200);
    },
@ -264,7 +283,7 @@ export function useLocationSearch(mode?: string) {

  const showEmptySearches = useCallback(() => {
    if (latestQueryRef.current.trim()) {
-      setOpen(results.length > 0);
+      setOpen(results.length > 0 || latestQueryRef.current.trim().length >= 2);
      return;
    }

@ -278,6 +297,7 @@ export function useLocationSearch(mode?: string) {
  const clear = useCallback(() => {
    setQuery('');
    latestQueryRef.current = '';
+    setSearching(false);
    setResults([]);
    lastResultsRef.current = [];
    setOpen(false);
@ -308,6 +328,8 @@ export function useLocationSearch(mode?: string) {
        e.preventDefault();
        if (activeIndex >= 0 && activeIndex < results.length) {
          onSelect(results[activeIndex]);
+        } else if (results.length > 0) {
+          onSelect(results[0]);
        } else if (looksLikePostcode(query)) {
          onSelect({ type: 'postcode', label: normalizePostcode(query) });
        }
@ -332,6 +354,7 @@ export function useLocationSearch(mode?: string) {
    activeIndex,
    setActiveIndex,
    open,
+    searching,
    setOpen,
    handleInputChange,
    handleKeyDown,
--- a/frontend/src/i18n/locales/de.ts
+++ b/frontend/src/i18n/locales/de.ts
@ -916,6 +916,7 @@ const de: Translations = {
  // ── Location Search ────────────────────────────────
  locationSearch: {
    placeholder: 'Orte oder Postcodes suchen...',
+    noResults: 'Keine passenden Orte oder Postcodes',
    postcodeNotFound: 'Postcode nicht gefunden',
    lookupFailed: 'Suche fehlgeschlagen',
    searchLabel: 'Orte oder Postcodes suchen',
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@ -892,6 +892,7 @@ const en = {
  // ── Location Search ────────────────────────────────
  locationSearch: {
    placeholder: 'Search places or postcodes...',
+    noResults: 'No matching places or postcodes',
    postcodeNotFound: 'Postcode not found',
    lookupFailed: 'Lookup failed',
    searchLabel: 'Search places or postcodes',
--- a/frontend/src/i18n/locales/fr.ts
+++ b/frontend/src/i18n/locales/fr.ts
@ -924,6 +924,7 @@ const fr: Translations = {
  // ── Location Search ────────────────────────────────
  locationSearch: {
    placeholder: 'Rechercher des lieux ou codes postaux...',
+    noResults: 'Aucun lieu ni code postal correspondant',
    postcodeNotFound: 'Code postal introuvable',
    lookupFailed: 'Échec de la recherche',
    searchLabel: 'Rechercher des lieux ou codes postaux',
--- a/frontend/src/i18n/locales/hi.ts
+++ b/frontend/src/i18n/locales/hi.ts
@ -876,6 +876,7 @@ const hi: Translations = {

  locationSearch: {
    placeholder: 'स्थान या पोस्टकोड खोजें...',
+    noResults: 'कोई मिलती-जुलती जगह या पोस्टकोड नहीं मिला',
    postcodeNotFound: 'पोस्टकोड नहीं मिला',
    lookupFailed: 'खोज विफल रही',
    searchLabel: 'स्थान या पोस्टकोड खोजें',
--- a/frontend/src/i18n/locales/hu.ts
+++ b/frontend/src/i18n/locales/hu.ts
@ -910,6 +910,7 @@ const hu: Translations = {
  // ── Location Search ────────────────────────────────
  locationSearch: {
    placeholder: 'Helyek vagy irányítószámok keresése...',
+    noResults: 'Nincs egyező hely vagy irányítószám',
    postcodeNotFound: 'Irányítószám nem található',
    lookupFailed: 'A keresés sikertelen',
    searchLabel: 'Helyek vagy irányítószámok keresése',
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@ -850,6 +850,7 @@ const zh: Translations = {
  // ── Location Search ────────────────────────────────
  locationSearch: {
    placeholder: '搜索地点或邮编...',
+    noResults: '未找到匹配的地点或邮编',
    postcodeNotFound: '未找到该邮编',
    lookupFailed: '查询失败',
    searchLabel: '搜索地点或邮编',
--- a/pipeline/test_validate_outputs.py
+++ b/pipeline/test_validate_outputs.py
@ -1,19 +1,33 @@
 from __future__ import annotations

-import zipfile
 import json
+import zipfile

 import polars as pl

 from pipeline.validate_outputs import main


-def write_boundary(path, postcodes):
+def polygon(offset=0):
+    x = float(offset)
+    return {
+        "type": "Polygon",
+        "coordinates": [
+            [(x, 0.0), (x + 0.001, 0.0), (x + 0.001, 0.001), (x, 0.001), (x, 0.0)]
+        ],
+    }
+
+
+def write_boundary(path, postcodes, geometries=None):
    units = path / "units"
    units.mkdir(parents=True)
    features = [
-        {"type": "Feature", "properties": {"postcodes": postcode}, "geometry": None}
-        for postcode in postcodes
+        {
+            "type": "Feature",
+            "properties": {"postcodes": postcode},
+            "geometry": (geometries[index] if geometries else polygon(index)),
+        }
+        for index, postcode in enumerate(postcodes)
    ]
    (units / "AA1.geojson").write_text(
        json.dumps({"type": "FeatureCollection", "features": features})
@ -111,3 +125,100 @@ def test_rejects_postcode_boundary_mismatch(tmp_path, monkeypatch, capsys):
    stderr = capsys.readouterr().err
    assert "missing boundaries" in stderr
    assert "boundary postcodes are absent" in stderr
+
+
+def test_rejects_invalid_postcode_boundary_features(tmp_path, monkeypatch, capsys):
+    postcodes_path = tmp_path / "postcodes.parquet"
+    boundaries_path = tmp_path / "postcode_boundaries"
+    units = boundaries_path / "units"
+    units.mkdir(parents=True)
+    pl.DataFrame({"postcode": ["AA1 1AA"]}).write_parquet(postcodes_path)
+    bowtie = {
+        "type": "Polygon",
+        "coordinates": [[(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]],
+    }
+    features = [
+        {
+            "type": "Feature",
+            "properties": {"postcodes": "AA1 1AA"},
+            "geometry": polygon(),
+        },
+        {
+            "type": "Feature",
+            "properties": {"postcodes": "AA1 1AA"},
+            "geometry": polygon(1),
+        },
+        {"type": "Feature", "properties": {}, "geometry": polygon(2)},
+        {"type": "Feature", "properties": {"postcodes": "AA1 1AB"}, "geometry": None},
+        {"type": "Feature", "properties": {"postcodes": "AA1 1AC"}, "geometry": bowtie},
+    ]
+    (units / "AA1.geojson").write_text(
+        json.dumps({"type": "FeatureCollection", "features": features})
+    )
+
+    monkeypatch.setattr(
+        "sys.argv",
+        [
+            "validate_outputs",
+            "--postcode-boundary-match",
+            f"{postcodes_path}::{boundaries_path}",
+        ],
+    )
+
+    assert main() == 1
+    stderr = capsys.readouterr().err
+    assert "duplicate boundary postcode features" in stderr
+    assert "missing properties.postcodes" in stderr
+    assert "missing or empty geometry" in stderr
+    assert "invalid boundary geometries" in stderr
+
+
+def test_validates_active_english_arcgis_boundary_matches(tmp_path, monkeypatch):
+    arcgis_path = tmp_path / "arcgis.parquet"
+    boundaries_path = tmp_path / "postcode_boundaries"
+    pl.DataFrame(
+        {
+            "pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
+            "ctry25cd": ["E92000001", "E92000001", "W92000004"],
+            "doterm": [None, "2020-01-01", None],
+        }
+    ).write_parquet(arcgis_path)
+    write_boundary(boundaries_path, ["AA1 1AA"])
+
+    monkeypatch.setattr(
+        "sys.argv",
+        [
+            "validate_outputs",
+            "--active-postcode-boundary-match",
+            f"{arcgis_path}::{boundaries_path}",
+        ],
+    )
+
+    assert main() == 0
+
+
+def test_rejects_active_english_arcgis_boundary_mismatch(tmp_path, monkeypatch, capsys):
+    arcgis_path = tmp_path / "arcgis.parquet"
+    boundaries_path = tmp_path / "postcode_boundaries"
+    pl.DataFrame(
+        {
+            "pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
+            "ctry25cd": ["E92000001", "E92000001", "W92000004"],
+            "doterm": [None, None, None],
+        }
+    ).write_parquet(arcgis_path)
+    write_boundary(boundaries_path, ["AA1 1AA", "CF1 1AA"])
+
+    monkeypatch.setattr(
+        "sys.argv",
+        [
+            "validate_outputs",
+            "--active-postcode-boundary-match",
+            f"{arcgis_path}::{boundaries_path}",
+        ],
+    )
+
+    assert main() == 1
+    stderr = capsys.readouterr().err
+    assert "active English postcodes" in stderr
+    assert "not active English postcodes" in stderr
--- a/pipeline/transform/crime_hotspot_tiles.py
+++ b/pipeline/transform/crime_hotspot_tiles.py
@ -46,8 +46,21 @@ def _require_tippecanoe() -> str:
    return executable


-def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
-    df = (
+def _write_geojsonseq(csvs: list[Path], output_path: Path) -> tuple[int, int]:
+    """Write one weighted GeoJSON point per distinct (anchor, month, type).
+
+    Returns ``(feature_count, incident_count)``. police.uk snaps every incident
+    to a shared "map point" anchor, so many incidents land on the exact same
+    coordinate. Collapsing them into one feature carrying ``count`` (the number
+    of incidents) keeps the per-crime-type and per-month filters intact while
+    turning each hotspot into a single high-weight point. That matters because
+    tippecanoe's ``--drop-densest-as-needed`` thins *feature density*, not
+    weight: with one feature per row the busiest streets were silently deleted;
+    with one weighted feature per anchor those hotspots survive and the dropped
+    detail is only redundant duplicate points. The heatmap reads ``count`` as
+    its weight.
+    """
+    grouped = (
        pl.scan_csv(
            csvs,
            schema_overrides={
@ -67,11 +80,15 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
        .drop_nulls(["lon", "lat"])
        .filter(pl.col("lon").is_between(-9.5, 5.0))
        .filter(pl.col("lat").is_between(49.0, 57.0))
+        .group_by("lon", "lat", "month", "crime_type")
+        .len()
+        .rename({"len": "count"})
        .collect(engine="streaming")
    )

+    incident_count = int(grouped["count"].sum())
    with output_path.open("w") as file:
-        for row in df.iter_rows(named=True):
+        for row in grouped.iter_rows(named=True):
            feature = {
                "type": "Feature",
                "geometry": {
@ -79,15 +96,15 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
                    "coordinates": [row["lon"], row["lat"]],
                },
                "properties": {
-                    "count": 1,
-                    "weight": 1,
+                    "count": row["count"],
+                    "weight": row["count"],
                    "month": row["month"],
                    "crime_type": row["crime_type"],
                },
            }
            file.write(json.dumps(feature, separators=(",", ":")) + "\n")

-    return df.height
+    return grouped.height, incident_count


 def build_crime_hotspot_tiles(
@ -104,9 +121,10 @@ def build_crime_hotspot_tiles(

    with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp:
        ndjson_path = Path(tmp) / "crime_hotspots.geojsonseq"
-        feature_count = _write_geojsonseq(csvs, ndjson_path)
+        feature_count, incident_count = _write_geojsonseq(csvs, ndjson_path)
        print(
-            f"Writing {feature_count:,} approximate crime heatmap points "
+            f"Writing {feature_count:,} weighted crime heatmap points "
+            f"({incident_count:,} incidents) "
            f"from {min(selected_months)} to {max(selected_months)}"
        )

--- a/pipeline/transform/crime_spatial.py
+++ b/pipeline/transform/crime_spatial.py
@ -1,14 +1,25 @@
-"""Aggregate police.uk street crime to postcodes by 50m spatial proximity.
+"""Aggregate police.uk street crime to postcodes by spatial proximity.

 Instead of attributing each incident to its published LSOA code, this transform
-counts the anonymised incident *points* that fall within 50m of each postcode's
-boundary polygon (the polygon buffered outward by 50m). A point inside several
-overlapping buffers counts for each postcode -- the same multiplicity the
-tree-density filter uses for features near more than one postcode.
+counts the anonymised incident *points* that fall within ``buffer_m`` (default
+100m) of each postcode's boundary polygon (the polygon buffered outward). A point
+inside several overlapping buffers counts for each postcode -- the same
+multiplicity the tree-density filter uses for features near more than one
+postcode. The wide 100m buffer deliberately smooths police.uk's snap-to-grid
+coordinates, which would otherwise make the count hypersensitive to which side of
+a narrow line a shared "map point" anchor happened to land on.

-The metric is a raw annualised count ("incidents/year within 50m"); there is no
-per-capita denominator. Outputs mirror the old LSOA transform's shape but are
-keyed on ``postcode`` instead of ``LSOA code``:
+Counts are **area-normalised**: each postcode's count is divided by its buffered
+catchment area and rescaled by the median catchment area, so the metric reflects
+crime *density* rather than how much ground the buffer sweeps (a median-sized
+catchment is left unchanged; a large rural postcode is no longer inflated simply
+for covering more of the map). Normalising by the buffered area -- the region
+that actually collects points -- rather than the raw polygon keeps tiny unit
+postcodes from being over-inflated by the fixed buffer-ring floor. The headline
+``"{type} (avg/yr)"`` is the simple mean of the per-year annualised counts, so it
+equals the average of the by-year chart bars.
+
+Outputs mirror the old LSOA transform's shape but are keyed on ``postcode``:

 * ``crime_by_postcode.parquet`` -- ``postcode`` + ``"{type} (avg/yr)"`` columns.
 * ``crime_by_postcode_by_year.parquet`` -- ``postcode`` + ``"{type} (by year)"``
@ -16,14 +27,15 @@ keyed on ``postcode`` instead of ``LSOA code``:

 Caveat: police.uk coordinates are snapped to a fixed set of anonymous "map
 points", not true locations, and a share of rows have no coordinate at all
-(dropped here). Spatial totals are therefore lower than, and fuzzier than, the
-old LSOA-tagged counts -- by design, not a regression.
+(dropped here). Spatial totals are therefore fuzzier than the old LSOA-tagged
+counts -- by design, not a regression.
 """

 from __future__ import annotations

 import argparse
 import re
+import sys
 from pathlib import Path

 import numpy as np
@ -41,7 +53,7 @@ from pipeline.transform.postcode_boundaries.loader import load_postcode_polygons
 # Serious types first so column order is stable and self-documenting.
 ALL_CRIME_TYPES: tuple[str, ...] = SERIOUS_CRIME_TYPES + MINOR_CRIME_TYPES

-DEFAULT_BUFFER_M = 50.0
+DEFAULT_BUFFER_M = 100.0
 MONTH_DIR_RE = re.compile(r"^\d{4}-\d{2}$")

 # Generous GB bounds; points outside fall in no English postcode anyway, but
@ -108,10 +120,11 @@ def _accumulate_counts(
        "Month": pl.Utf8,
        "Crime type": pl.Utf8,
    }
-    known_types = list(type_to_idx)
+    years = list(year_to_idx)
    total_points = 0
    total_matches = 0
    total_dropped = 0
+    unknown_type_counts: dict[str, int] = {}

    for start in range(0, len(csvs), _CSV_BATCH):
        batch = csvs[start : start + _CSV_BATCH]
@ -122,31 +135,47 @@ def _accumulate_counts(
                ignore_errors=True,
            )
            .select("Longitude", "Latitude", "Month", "Crime type")
-            .with_columns(pl.col("Month").str.slice(0, 4).cast(pl.Int32).alias("year"))
+            # strict=False: a single malformed Month drops only that row instead
+            # of aborting the whole build (a non-numeric year becomes null and is
+            # filtered out by the year membership check below).
+            .with_columns(
+                pl.col("Month").str.slice(0, 4).cast(pl.Int32, strict=False).alias("year")
+            )
            .filter(
                pl.col("Longitude").is_not_null()
                & pl.col("Latitude").is_not_null()
                & pl.col("Longitude").is_between(*LON_BOUNDS)
                & pl.col("Latitude").is_between(*LAT_BOUNDS)
-                & pl.col("Crime type").is_in(known_types)
-                & pl.col("year").is_in(list(year_to_idx))
+                & pl.col("Crime type").is_not_null()
+                & (pl.col("Crime type") != "")
+                & pl.col("year").is_in(years)
            )
+            # Map crime types to indices with default=None so an unrecognised
+            # type yields a null index we can *report* rather than silently drop
+            # (the legacy LSOA path surfaced unknown types via its dynamic pivot).
            .with_columns(
                pl.col("Crime type")
-                .replace_strict(type_to_idx, return_dtype=pl.Int32)
+                .replace_strict(type_to_idx, default=None, return_dtype=pl.Int32)
                .alias("tidx"),
                pl.col("year")
                .replace_strict(year_to_idx, return_dtype=pl.Int32)
                .alias("yidx"),
            )
-            .select("Longitude", "Latitude", "tidx", "yidx")
+            .select("Longitude", "Latitude", "Crime type", "tidx", "yidx")
            .collect(engine="streaming")
        )

-        rows_in = frame.height
-        if rows_in == 0:
+        if frame.height == 0:
            continue

+        unknown = frame.filter(pl.col("tidx").is_null())
+        if unknown.height:
+            for name, cnt in unknown.group_by("Crime type").len().iter_rows():
+                unknown_type_counts[name] = unknown_type_counts.get(name, 0) + cnt
+            frame = frame.filter(pl.col("tidx").is_not_null())
+            if frame.height == 0:
+                continue
+
        lon = frame["Longitude"].to_numpy()
        lat = frame["Latitude"].to_numpy()
        tidx = frame["tidx"].to_numpy()
@ -177,6 +206,20 @@ def _accumulate_counts(

    if total_dropped:
        print(f"Dropped {total_dropped:,} points outside the BNG transform domain")
+    if unknown_type_counts:
+        total_unknown = sum(unknown_type_counts.values())
+        listed = ", ".join(
+            f"{name!r} ({cnt:,})"
+            for name, cnt in sorted(
+                unknown_type_counts.items(), key=lambda kv: kv[1], reverse=True
+            )
+        )
+        print(
+            f"WARNING: dropped {total_unknown:,} incidents with crime types not in "
+            f"ALL_CRIME_TYPES (taxonomy is stale -- update SERIOUS/MINOR_CRIME_TYPES): "
+            f"{listed}",
+            file=sys.stderr,
+        )


 def _rollup_long(
@ -195,12 +238,27 @@ def _rollup_long(
 def _write_avg_yr(
    postcodes: np.ndarray,
    counts: np.ndarray,
-    valid_month_count: int,
+    years: list[int],
+    months_in_year: dict[int, int],
+    norm: np.ndarray,
    output_path: Path,
 ) -> None:
-    """Write ``postcode`` + ``"{type} (avg/yr)"`` annualised totals."""
-    totals = counts.sum(axis=2)  # (n_postcodes, n_types)
-    avg = np.round(totals / valid_month_count * 12.0, 1).astype(np.float32)
+    """Write ``postcode`` + ``"{type} (avg/yr)"`` density-normalised averages.
+
+    The headline figure is the **simple mean of the per-year annualised counts**
+    (each year scaled to a 12-month equivalent), so it equals the average of the
+    by-year chart bars instead of a month-weighted pooled rate. Each postcode's
+    value is then multiplied by ``norm`` (median_area / buffered catchment area)
+    so the metric is a density rather than a footprint-inflated raw count.
+    """
+    months = np.array([months_in_year[year] for year in years], dtype=np.float64)
+    per_year = counts.astype(np.float64) * 12.0 / months[None, None, :]
+    # Average over the years each type is actually observed anywhere -- the same
+    # per-type x-span the by-year chart plots (server-rs/.../crime_by_year.rs).
+    type_year_present = counts.sum(axis=0) > 0  # (n_types, n_years)
+    years_per_type = np.clip(type_year_present.sum(axis=1), 1, None).astype(np.float64)
+    avg = per_year.sum(axis=2) / years_per_type[None, :]  # (n_postcodes, n_types)
+    avg = np.round(avg * norm[:, None], 1).astype(np.float32)

    data: dict[str, np.ndarray] = {"postcode": postcodes}
    for type_idx, name in enumerate(ALL_CRIME_TYPES):
@ -216,11 +274,20 @@ def _write_by_year(
    counts: np.ndarray,
    years: list[int],
    months_in_year: dict[int, int],
+    norm: np.ndarray,
    output_path: Path,
 ) -> None:
-    """Write nested ``"{type} (by year)"`` series plus Serious/Minor rollups."""
+    """Write nested ``"{type} (by year)"`` series plus Serious/Minor rollups.
+
+    Per-year counts are area-normalised by the same ``norm`` (median_area /
+    buffered catchment area) factor applied to the avg/yr headline, so the chart
+    bars and the headline figure remain mutually consistent.
+    """
    months = np.array([months_in_year[year] for year in years], dtype=np.float64)
-    annual = np.round(counts.astype(np.float64) * 12.0 / months[None, None, :], 1)
+    annual = np.round(
+        counts.astype(np.float64) * 12.0 / months[None, None, :] * norm[:, None, None],
+        1,
+    )

    pc_i, ty_i, yr_i = np.nonzero(counts)
    if pc_i.size == 0:
@ -278,8 +345,27 @@ def transform_crime_spatial(
    )

    postcodes, polygons = load_postcode_polygons(boundaries_dir, max_postcodes)
+
    print(f"Buffering {len(postcodes):,} postcode polygons by {buffer_m:g}m...")
-    _buffers, tree = _build_tree(polygons, buffer_m)
+    buffers, tree = _build_tree(polygons, buffer_m)
+
+    # Area-normalisation factor (median_area / catchment_area): divides out the
+    # size of each postcode's catchment so the count measures crime density, not
+    # how much ground the buffer sweeps. We normalise by the *buffered* area --
+    # the region that actually collects points -- rather than the raw polygon, so
+    # a tiny unit postcode isn't over-inflated by the fixed buffer-ring floor.
+    # Buffers are in EPSG:27700, so shapely.area is in m^2.
+    areas = shapely.area(buffers).astype(np.float64)
+    usable_area = np.isfinite(areas) & (areas > 0)
+    if not usable_area.any():
+        raise ValueError("No postcode buffers have a positive area to normalise by")
+    median_area = float(np.median(areas[usable_area]))
+    norm = np.zeros(len(postcodes), dtype=np.float64)
+    norm[usable_area] = median_area / areas[usable_area]
+    print(
+        f"Area-normalising to median catchment area {median_area:,.0f} m^2 "
+        f"({int(usable_area.sum()):,}/{len(areas):,} postcodes have usable area)"
+    )

    type_to_idx = {name: idx for idx, name in enumerate(ALL_CRIME_TYPES)}
    year_to_idx = {year: idx for idx, year in enumerate(years)}
@ -288,8 +374,8 @@ def transform_crime_spatial(
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
    _accumulate_counts(csvs, tree, type_to_idx, year_to_idx, transformer, counts)

-    _write_avg_yr(postcodes, counts, valid_month_count, output_path)
-    _write_by_year(postcodes, counts, years, months_in_year, by_year_output_path)
+    _write_avg_yr(postcodes, counts, years, months_in_year, norm, output_path)
+    _write_by_year(postcodes, counts, years, months_in_year, norm, by_year_output_path)


 def main() -> None:
--- a/pipeline/transform/merge.py
+++ b/pipeline/transform/merge.py
@ -116,6 +116,66 @@ TREE_DENSITY_FEATURE = "Street tree density percentile"
 _POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
    r"^Tree canopy density percentile within \d+m$"
 )
+_FINAL_DROP_COLUMNS = [
+    "inspection_date",
+    "_bedrooms",
+    "LSOA name (2021)",
+    "Local Authority District code (2024)",
+    "Local Authority District name (2024)",
+    "Wider Barriers Sub-domain Score",
+    "Geographical Barriers Sub-domain Score",
+    "Adult Skills Sub-domain Score",
+    "Children and Young People Sub-domain Score",
+    "Crime Score",
+    "Living Environment Score",
+    "Index of Multiple Deprivation (IMD) Score",
+    "Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
+    "Income Deprivation Affecting Children Index (IDACI) Score (rate)",
+    "Barriers to Housing and Services Score",
+    "oa21",
+    "pcon",
+    "epc_property_type",
+    "pp_property_type",
+    "built_form",
+]
+_FINAL_RENAME_COLUMNS = {
+    "date_of_transfer": "Date of last transaction",
+    "construction_age_band": "Construction year",
+    "is_construction_date_approximate": "Is construction date approximate",
+    "Income Score (rate)": "Income Score",
+    "Employment Score (rate)": "Employment Score",
+    "Indoors Sub-domain Score": "Housing Conditions Score",
+    "Outdoors Sub-domain Score": "Air Quality and Road Safety Score",
+    "pp_address": "Address per Property Register",
+    "epc_address": "Address per EPC",
+    "postcode": "Postcode",
+    "duration": "Leasehold/Freehold",
+    "current_energy_rating": "Current energy rating",
+    "potential_energy_rating": "Potential energy rating",
+    "total_floor_area": "Total floor area (sqm)",
+    "property_type": "Property type",
+    "restaurants_2km": "Number of restaurants within 2km",
+    "groceries_2km": "Number of grocery shops and supermarkets within 2km",
+    "latest_price": "Last known price",
+    "number_habitable_rooms": "Number of bedrooms & living rooms",
+    "noise_lden_db": "Noise (dB)",
+    "good_primary_5km": "Good+ primary schools within 5km",
+    "good_secondary_5km": "Good+ secondary schools within 5km",
+    "good_primary_2km": "Good+ primary schools within 2km",
+    "good_secondary_2km": "Good+ secondary schools within 2km",
+    "outstanding_primary_5km": "Outstanding primary schools within 5km",
+    "outstanding_secondary_5km": "Outstanding secondary schools within 5km",
+    "outstanding_primary_2km": "Outstanding primary schools within 2km",
+    "outstanding_secondary_2km": "Outstanding secondary schools within 2km",
+    "max_download_speed": "Max available download speed (Mbps)",
+    "serious_crime_avg_yr": "Serious crime (avg/yr)",
+    "minor_crime_avg_yr": "Minor crime (avg/yr)",
+    "mean_monthly_rent": "Estimated monthly rent",
+    "floor_height": "Interior height (m)",
+    "was_council_house": "Former council house",
+    "median_age": "Median age",
+    "turnout_pct": "Voter turnout (%)",
+}
 _RENT_SOURCE_UNAVAILABLE_LADS = {
    # ONS PIPR does not publish LAD-level private-rent estimates for these
    # small authorities. Keep rent null there, but fail on any other LAD miss.
@ -707,6 +767,181 @@ def _validate_property_postcodes(df: pl.DataFrame) -> None:
    )


+def _active_english_postcode_area(arcgis_raw: pl.LazyFrame) -> pl.LazyFrame:
+    """Return the supported postcode universe with geography join keys."""
+    return (
+        arcgis_raw.filter(pl.col("ctry25cd") == "E92000001")
+        .filter(pl.col("doterm").is_null())
+        .select(
+            pl.col("pcds").alias("postcode"),
+            "lat",
+            pl.col("long").alias("lon"),
+            "ctry25cd",
+            pl.col("lsoa21cd").alias("lsoa21"),
+            pl.col("oa21cd").alias("oa21"),
+            pl.col("pcon24cd").alias("pcon"),
+        )
+        .drop_nulls(["postcode"])
+        .unique(["postcode"])
+    )
+
+
+def _remap_terminated_postcodes(
+    wide: pl.LazyFrame, postcode_mapping: pl.LazyFrame
+) -> pl.LazyFrame:
+    return (
+        wide.join(
+            postcode_mapping,
+            left_on="postcode",
+            right_on="old_postcode",
+            how="left",
+        )
+        .with_columns(
+            pl.coalesce("new_postcode", "postcode").alias("postcode"),
+        )
+        .drop("new_postcode")
+    )
+
+
+def _filter_to_active_english_postcodes(
+    wide: pl.LazyFrame, active_postcodes: pl.LazyFrame
+) -> pl.LazyFrame:
+    return wide.join(active_postcodes, on="postcode", how="semi")
+
+
+def _join_area_side_tables(
+    base: pl.LazyFrame,
+    *,
+    iod: pl.LazyFrame,
+    ethnicity: pl.LazyFrame,
+    crime: pl.LazyFrame,
+    median_age: pl.LazyFrame,
+    election: pl.LazyFrame,
+    poi_counts: pl.LazyFrame,
+    noise: pl.LazyFrame,
+    school_proximity: pl.LazyFrame,
+    conservation_areas: pl.LazyFrame,
+    tree_density: pl.LazyFrame | None,
+    broadband: pl.LazyFrame,
+) -> pl.LazyFrame:
+    base = base.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
+    base = base.join(
+        ethnicity,
+        left_on="Local Authority District code (2024)",
+        right_on="Geography_code",
+        how="left",
+    )
+
+    # Crime is counted spatially per postcode (incidents within 50m of the
+    # postcode boundary), so it joins on postcode rather than LSOA.
+    base = base.join(crime, on="postcode", how="left")
+    base = base.with_columns(
+        pl.sum_horizontal(
+            "Violence and sexual offences (avg/yr)",
+            "Robbery (avg/yr)",
+            "Burglary (avg/yr)",
+            "Possession of weapons (avg/yr)",
+        ).alias("serious_crime_avg_yr"),
+        pl.sum_horizontal(
+            "Anti-social behaviour (avg/yr)",
+            "Criminal damage and arson (avg/yr)",
+            "Shoplifting (avg/yr)",
+            "Bicycle theft (avg/yr)",
+            "Theft from the person (avg/yr)",
+            "Other theft (avg/yr)",
+            "Vehicle crime (avg/yr)",
+            "Public order (avg/yr)",
+            "Drugs (avg/yr)",
+            "Other crime (avg/yr)",
+        ).alias("minor_crime_avg_yr"),
+    )
+
+    base = base.join(median_age, on="lsoa21", how="left")
+    base = base.join(election, on="pcon", how="left")
+    base = base.join(poi_counts, on="postcode", how="left")
+    base = base.join(noise, on="postcode", how="left")
+    base = base.join(school_proximity, on="postcode", how="left")
+    base = base.join(conservation_areas, on="postcode", how="left").with_columns(
+        pl.col(CONSERVATION_AREA_FEATURE).fill_null("No")
+    )
+    if tree_density is not None:
+        base = base.join(tree_density, on="postcode", how="left")
+    return base.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
+
+
+def _finalize_merged_columns(frame: pl.LazyFrame) -> pl.LazyFrame:
+    return frame.drop(_FINAL_DROP_COLUMNS, strict=False).rename(
+        _FINAL_RENAME_COLUMNS, strict=False
+    )
+
+
+def _area_columns_from(columns: list[str]) -> list[str]:
+    return [
+        c for c in columns if c in _AREA_COLUMNS or _is_dynamic_poi_metric_column(c)
+    ]
+
+
+def _property_columns_from(columns: list[str]) -> list[str]:
+    return [
+        c
+        for c in columns
+        if (c not in _AREA_COLUMNS and not _is_dynamic_poi_metric_column(c))
+        or c == "Postcode"
+    ]
+
+
+def _validate_postcode_feature_output(
+    postcode_df: pl.DataFrame, expected_postcode_count: int
+) -> None:
+    required = {"Postcode", "lat", "lon", "ctry25cd"}
+    missing = sorted(required - set(postcode_df.columns))
+    if missing:
+        raise ValueError(f"Postcode feature output missing columns: {missing}")
+
+    unique_count = postcode_df["Postcode"].n_unique()
+    if (
+        postcode_df.height != expected_postcode_count
+        or unique_count != expected_postcode_count
+    ):
+        raise ValueError(
+            "Postcode feature output no longer matches the active England "
+            "postcode universe: "
+            f"rows={postcode_df.height:,}, unique={unique_count:,}, "
+            f"expected={expected_postcode_count:,}"
+        )
+
+    invalid = postcode_df.filter(
+        pl.col("Postcode").is_null()
+        | (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
+        | pl.col("lat").is_null()
+        | pl.col("lon").is_null()
+        | pl.col("ctry25cd").is_null()
+        | (pl.col("ctry25cd") != "E92000001")
+    )
+    if invalid.height > 0:
+        sample = (
+            invalid.select("Postcode", "ctry25cd", "lat", "lon").head(10).to_dicts()
+        )
+        raise ValueError(
+            "Postcode feature output contains unsupported or ungeocoded rows: "
+            f"{invalid.height} rows. Sample: {sample}"
+        )
+
+
+def _split_normal_outputs(
+    df: pl.DataFrame,
+    postcode_features: pl.DataFrame,
+    *,
+    expected_postcode_count: int,
+) -> tuple[pl.DataFrame, pl.DataFrame]:
+    postcode_df = postcode_features.select(
+        _area_columns_from(postcode_features.columns)
+    )
+    _validate_postcode_feature_output(postcode_df, expected_postcode_count)
+    properties_df = df.select(_property_columns_from(df.columns))
+    return postcode_df, properties_df
+
+
 # Map listings-parquet source columns to the `_actual_*` overlay columns
 # carried alongside the wide frame through the postcode-keyed joins. After the
 # rest of the pipeline finalises, listing rows pick their canonical dashboard
@ -927,9 +1162,7 @@ def _best_listing_match(
    return best, float(best_score), "address", best_field


-def _load_listings_for_merge(
-    listings_path: Path, arcgis_path: Path
-) -> pl.DataFrame:
+def _load_listings_for_merge(listings_path: Path, arcgis_path: Path) -> pl.DataFrame:
    """Read the listings parquet and prepare it for the wide-frame merge.

    Output is keyed by `_listing_idx` and carries:
@ -1032,7 +1265,11 @@ def _load_direct_epc_candidates(
        "_direct_epc_outcode": pl.Utf8,
        "_direct_epc_canonical_property_type": pl.Utf8,
        "_direct_epc_uprn": pl.Utf8,
-        **{column: dtype for column, dtype in _DIRECT_EPC_COLUMNS if column.startswith("_direct_")},
+        **{
+            column: dtype
+            for column, dtype in _DIRECT_EPC_COLUMNS
+            if column.startswith("_direct_")
+        },
    }
    if not listing_outcodes:
        return pl.DataFrame(schema=schema)
@ -1089,9 +1326,7 @@ def _load_direct_epc_candidates(
            pl.col("epc_address").alias("_direct_epc_address"),
            pl.col("uprn").alias("_direct_epc_uprn"),
            pl.col("total_floor_area").alias("_direct_total_floor_area"),
-            pl.col("number_habitable_rooms").alias(
-                "_direct_number_habitable_rooms"
-            ),
+            pl.col("number_habitable_rooms").alias("_direct_number_habitable_rooms"),
            pl.col("floor_height").alias("_direct_floor_height"),
            pl.col("_direct_was_council_house").fill_null("No"),
        )
@ -1141,9 +1376,7 @@ def _listing_match_frame(listings: pl.DataFrame) -> pl.DataFrame:
    )


-def _optional_lazy_col(
-    schema: pl.Schema, column: str, dtype: pl.DataType
-) -> pl.Expr:
+def _optional_lazy_col(schema: pl.Schema, column: str, dtype: pl.DataType) -> pl.Expr:
    if column in schema:
        return pl.col(column).cast(dtype, strict=False).alias(column)
    return pl.lit(None, dtype=dtype).alias(column)
@ -1640,27 +1873,18 @@ def _build(
        | (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
    )

-    # Remap terminated postcodes to nearest active successor
+    # Remap terminated postcodes to nearest active successor before filtering to
+    # the supported active-English postcode universe. Historical properties from
+    # terminated English postcodes are retained under their successor postcode.
    postcode_mapping = build_postcode_mapping(arcgis_path)
-    wide = (
-        wide.join(
-            postcode_mapping.lazy(),
-            left_on="postcode",
-            right_on="old_postcode",
-            how="left",
-        )
-        .with_columns(
-            pl.coalesce("new_postcode", "postcode").alias("postcode"),
-        )
-        .drop("new_postcode")
-    )
-
+    wide = _remap_terminated_postcodes(wide, postcode_mapping.lazy())
    arcgis_raw = pl.scan_parquet(arcgis_path)
-    postcode_country = arcgis_raw.select(
-        pl.col("pcds").alias("postcode"),
-        pl.col("ctry25cd"),
-    ).unique(["postcode"])
-    wide = wide.join(postcode_country, on="postcode", how="left")
+    arcgis = _active_english_postcode_area(arcgis_raw)
+    active_postcodes = arcgis.select("postcode").unique()
+    active_postcode_count = (
+        active_postcodes.select(pl.len()).collect(engine="streaming").item()
+    )
+    wide = _filter_to_active_english_postcodes(wide, active_postcodes)

    if listed_buildings_path is not None:
        active_postcodes_for_listed = (
@ -1691,92 +1915,25 @@ def _build(
            arcgis_path,
            epc_path=actual_listings_epc_path,
        )
+        wide = _filter_to_active_english_postcodes(wide, active_postcodes)

    wide = wide.with_columns(pl.col(LISTED_BUILDING_FEATURE).fill_null("No"))

-    arcgis = (
-        arcgis_raw.filter(pl.col("ctry25cd") == "E92000001")  # England only
-        .filter(pl.col("doterm").is_null())  # Active postcodes only
-        # NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
-        # Alias them back to the short canonical names used across the
-        # pipeline so downstream joins don't need to know about NSPL's
-        # versioning scheme.
-        .select(
-            pl.col("pcds").alias("postcode"),
-            "lat",
-            pl.col("long").alias("lon"),
-            pl.col("lsoa21cd").alias("lsoa21"),
-            pl.col("oa21cd").alias("oa21"),
-            pl.col("pcon24cd").alias("pcon"),
-        )
-    )
+    # NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
+    # `_active_english_postcode_area` aliases them back to the short canonical
+    # names used across the pipeline so downstream joins don't need to know
+    # about NSPL's versioning scheme.
    wide = wide.join(arcgis, on="postcode", how="left")
+    postcode_area = arcgis

    iod = pl.scan_parquet(iod_path).with_columns(
        *(_less_deprived_percentile_expr(c) for c in _IOD_PERCENTILE_COLUMNS)
    )
-    wide = wide.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
-
    ethnicity = pl.scan_parquet(ethnicity_path)
-    wide = wide.join(
-        ethnicity,
-        left_on="Local Authority District code (2024)",
-        right_on="Geography_code",
-        how="left",
-    )
-
-    # Derive bedroom count: habitable rooms - 1 (assuming 1 reception room), clipped to 0..4
-    wide = wide.with_columns(
-        (pl.col("number_habitable_rooms") - 1)
-        .clip(0, 4)
-        .cast(pl.UInt8)
-        .alias("_bedrooms"),
-    )
-    rental = pl.scan_parquet(rental_prices_path).select(
-        "area_code", "bedrooms", "mean_monthly_rent"
-    )
-    wide = wide.join(
-        rental,
-        left_on=["Local Authority District code (2024)", "_bedrooms"],
-        right_on=["area_code", "bedrooms"],
-        how="left",
-    )
-
-    # Crime is counted spatially per postcode (incidents within 50m of the
-    # postcode boundary), so it joins on postcode rather than LSOA.
    crime = pl.scan_parquet(crime_path)
-    wide = wide.join(crime, on="postcode", how="left")
-
-    wide = wide.with_columns(
-        pl.sum_horizontal(
-            "Violence and sexual offences (avg/yr)",
-            "Robbery (avg/yr)",
-            "Burglary (avg/yr)",
-            "Possession of weapons (avg/yr)",
-        ).alias("serious_crime_avg_yr"),
-        pl.sum_horizontal(
-            "Anti-social behaviour (avg/yr)",
-            "Criminal damage and arson (avg/yr)",
-            "Shoplifting (avg/yr)",
-            "Bicycle theft (avg/yr)",
-            "Theft from the person (avg/yr)",
-            "Other theft (avg/yr)",
-            "Vehicle crime (avg/yr)",
-            "Public order (avg/yr)",
-            "Drugs (avg/yr)",
-            "Other crime (avg/yr)",
-        ).alias("minor_crime_avg_yr"),
-    )
-
    median_age = pl.scan_parquet(median_age_path)
-    wide = wide.join(median_age, on="lsoa21", how="left")
-
    election = pl.scan_parquet(election_results_path)
-    wide = wide.join(election, on="pcon", how="left")
-
    poi_counts = pl.scan_parquet(poi_proximity_path)
-    wide = wide.join(poi_counts, on="postcode", how="left")
-
    noise_cols = ["road_noise_lden_db", "rail_noise_lden_db", "airport_noise_lden_db"]
    noise = (
        pl.scan_parquet(noise_path)
@ -1789,21 +1946,13 @@ def _build(
        )
        .select("postcode", "noise_lden_db")
    )
-    wide = wide.join(noise, on="postcode", how="left")
-
    school_proximity = pl.scan_parquet(school_proximity_path)
-    wide = wide.join(school_proximity, on="postcode", how="left")
-
    conservation_areas = _conservation_area_by_postcode(
        arcgis.select("postcode", "lat", "lon"), conservation_areas_path
    )
-    wide = wide.join(conservation_areas, on="postcode", how="left").with_columns(
-        pl.col(CONSERVATION_AREA_FEATURE).fill_null("No")
-    )
-
+    tree_density = None
    if tree_density_postcodes_path is not None:
        tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
-        wide = wide.join(tree_density, on="postcode", how="left")

    # Broadband: derive max available download speed tier per postcode from
    # Ofcom availability percentages.  Tiers: Gigabit ≥1000, UFBB ≥300,
@ -1828,7 +1977,38 @@ def _build(
        .agg(pl.col("max_download_speed").max())
        .with_columns(pl.col("max_download_speed").cast(pl.Utf8))
    )
-    wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
+    area_side_tables = {
+        "iod": iod,
+        "ethnicity": ethnicity,
+        "crime": crime,
+        "median_age": median_age,
+        "election": election,
+        "poi_counts": poi_counts,
+        "noise": noise,
+        "school_proximity": school_proximity,
+        "conservation_areas": conservation_areas,
+        "tree_density": tree_density,
+        "broadband": broadband,
+    }
+    wide = _join_area_side_tables(wide, **area_side_tables)
+    postcode_area = _join_area_side_tables(postcode_area, **area_side_tables)
+
+    # Derive bedroom count: habitable rooms - 1 (assuming 1 reception room), clipped to 0..4
+    wide = wide.with_columns(
+        (pl.col("number_habitable_rooms") - 1)
+        .clip(0, 4)
+        .cast(pl.UInt8)
+        .alias("_bedrooms"),
+    )
+    rental = pl.scan_parquet(rental_prices_path).select(
+        "area_code", "bedrooms", "mean_monthly_rent"
+    )
+    wide = wide.join(
+        rental,
+        left_on=["Local Authority District code (2024)", "_bedrooms"],
+        right_on=["area_code", "bedrooms"],
+        how="left",
+    )

    # Derive property_type: prefer EPC data, fall back to price-paid.
    # For Houses, use built_form (e.g. Semi-Detached, Mid-Terrace) for finer detail.
@ -1862,112 +2042,40 @@ def _build(
        .alias("property_type")
    )

-    wide = (
-        wide.with_columns(
-            pl.when(pl.col("duration") == "U")
-            .then(None)
-            .otherwise(pl.col("duration"))
-            .alias("duration"),
-            pl.when(pl.col("current_energy_rating") == "INVALID!")
-            .then(None)
-            .otherwise(pl.col("current_energy_rating"))
-            .alias("current_energy_rating"),
-        )
-        .with_columns(
-            (pl.col("latest_price") / pl.col("total_floor_area"))
-            .round(0)
-            .cast(pl.Int32)
-            .alias("Price per sqm"),
-        )
-        .drop(
-            "inspection_date",
-            "_bedrooms",
-            "LSOA name (2021)",
-            "Local Authority District code (2024)",
-            "Local Authority District name (2024)",
-            "Wider Barriers Sub-domain Score",
-            "Geographical Barriers Sub-domain Score",
-            "Adult Skills Sub-domain Score",
-            "Children and Young People Sub-domain Score",
-            "Crime Score",
-            "Living Environment Score",
-            "Index of Multiple Deprivation (IMD) Score",
-            "Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
-            "Income Deprivation Affecting Children Index (IDACI) Score (rate)",
-            "Barriers to Housing and Services Score",
-            "oa21",
-            "pcon",
-            "epc_property_type",
-            "pp_property_type",
-            "built_form",
-        )
-        .rename(
-            {
-                "date_of_transfer": "Date of last transaction",
-                "construction_age_band": "Construction year",
-                "is_construction_date_approximate": "Is construction date approximate",
-                "Income Score (rate)": "Income Score",
-                "Employment Score (rate)": "Employment Score",
-                "Indoors Sub-domain Score": "Housing Conditions Score",
-                "Outdoors Sub-domain Score": "Air Quality and Road Safety Score",
-                "pp_address": "Address per Property Register",
-                "epc_address": "Address per EPC",
-                "postcode": "Postcode",
-                "duration": "Leasehold/Freehold",
-                "current_energy_rating": "Current energy rating",
-                "potential_energy_rating": "Potential energy rating",
-                "total_floor_area": "Total floor area (sqm)",
-                "property_type": "Property type",
-                "restaurants_2km": "Number of restaurants within 2km",
-                "groceries_2km": "Number of grocery shops and supermarkets within 2km",
-                "latest_price": "Last known price",
-                "number_habitable_rooms": "Number of bedrooms & living rooms",
-                "noise_lden_db": "Noise (dB)",
-                "good_primary_5km": "Good+ primary schools within 5km",
-                "good_secondary_5km": "Good+ secondary schools within 5km",
-                "good_primary_2km": "Good+ primary schools within 2km",
-                "good_secondary_2km": "Good+ secondary schools within 2km",
-                "outstanding_primary_5km": "Outstanding primary schools within 5km",
-                "outstanding_secondary_5km": "Outstanding secondary schools within 5km",
-                "outstanding_primary_2km": "Outstanding primary schools within 2km",
-                "outstanding_secondary_2km": "Outstanding secondary schools within 2km",
-                "max_download_speed": "Max available download speed (Mbps)",
-                "serious_crime_avg_yr": "Serious crime (avg/yr)",
-                "minor_crime_avg_yr": "Minor crime (avg/yr)",
-                "mean_monthly_rent": "Estimated monthly rent",
-                "floor_height": "Interior height (m)",
-                "was_council_house": "Former council house",
-                "median_age": "Median age",
-                "turnout_pct": "Voter turnout (%)",
-            }
-        )
+    wide = wide.with_columns(
+        pl.when(pl.col("duration") == "U")
+        .then(None)
+        .otherwise(pl.col("duration"))
+        .alias("duration"),
+        pl.when(pl.col("current_energy_rating") == "INVALID!")
+        .then(None)
+        .otherwise(pl.col("current_energy_rating"))
+        .alias("current_energy_rating"),
+    ).with_columns(
+        (pl.col("latest_price") / pl.col("total_floor_area"))
+        .round(0)
+        .cast(pl.Int32)
+        .alias("Price per sqm"),
    )
+    wide = _finalize_merged_columns(wide)
+    postcode_area = _finalize_merged_columns(postcode_area)

    print("Collecting with streaming engine...")
-    df = wide.collect(engine="streaming")

    if mode == "listings":
+        df = wide.collect(engine="streaming")
        enriched_listings = _finalize_listings(df)
        _validate_property_postcodes(enriched_listings)
        print(f"Enriched listings rows: {enriched_listings.height}")
        return _BuildResult(listings=enriched_listings)

+    df, postcode_features = pl.collect_all([wide, postcode_area], engine="streaming")
    _validate_property_postcodes(df)

-    # Split into postcode-level and property-level dataframes
-    area_cols = [
-        c for c in df.columns if c in _AREA_COLUMNS or _is_dynamic_poi_metric_column(c)
-    ]
-    postcode_df = df.select(area_cols).group_by("Postcode").first()
+    postcode_df, properties_df = _split_normal_outputs(
+        df, postcode_features, expected_postcode_count=active_postcode_count
+    )
    print(f"Postcode rows: {postcode_df.height} (unique postcodes)")
-
-    property_cols = [
-        c
-        for c in df.columns
-        if (c not in _AREA_COLUMNS and not _is_dynamic_poi_metric_column(c))
-        or c == "Postcode"
-    ]
-    properties_df = df.select(property_cols)
    print(f"Property rows: {properties_df.height}")

    return _BuildResult(postcode=postcode_df, properties=properties_df)
--- a/pipeline/transform/postcode_boundaries/output.py
+++ b/pipeline/transform/postcode_boundaries/output.py
@ -1,10 +1,12 @@
 import json
+import shutil
 from collections import defaultdict
 from pathlib import Path

 from pyproj import Transformer
-from shapely import make_valid
-from shapely.geometry import MultiPolygon, Polygon
+from shapely import make_valid, set_precision
+from shapely.geometry import MultiPolygon, Polygon, mapping, shape
+from shapely.ops import transform as transform_geometry
 from shapely.ops import unary_union
 from tqdm import tqdm

@ -18,49 +20,47 @@ def _get_to_wgs84():
    return _to_wgs84


+def _largest_polygonal(geom) -> Polygon | None:
+    if geom is None or geom.is_empty:
+        return None
+    if not geom.is_valid:
+        geom = make_valid(geom)
+    if geom.geom_type == "Polygon":
+        return geom
+    if geom.geom_type == "MultiPolygon":
+        return max(geom.geoms, key=lambda g: g.area)
+    if geom.geom_type == "GeometryCollection":
+        polygons = [
+            polygon
+            for part in geom.geoms
+            if (polygon := _largest_polygonal(part)) is not None
+        ]
+        if polygons:
+            return max(polygons, key=lambda g: g.area)
+    return None
+
+
 def to_wgs84_geojson(
    geom: Polygon | MultiPolygon, tolerance: float = 1.0
 ) -> dict | None:
    """Simplify geometry in BNG, convert to WGS84, return GeoJSON dict."""
-    if geom.is_empty:
+    geom = _largest_polygonal(geom)
+    if geom is None:
        return None

    simplified = geom.simplify(tolerance, preserve_topology=True)
-    if simplified.is_empty:
+    simplified = _largest_polygonal(simplified)
+    if simplified is None:
        return None

    transformer = _get_to_wgs84()
-
-    def transform_ring(coords):
-        xs, ys = zip(*coords)
-        lons, lats = transformer.transform(list(xs), list(ys))
-        return [(round(lon, 6), round(lat, 6)) for lon, lat in zip(lons, lats)]
-
-    def transform_polygon(poly):
-        exterior = transform_ring(poly.exterior.coords)
-        holes = [transform_ring(h.coords) for h in poly.interiors]
-        return [exterior] + holes
-
-    # Force single Polygon — postcodes are contiguous delivery routes
-    if simplified.geom_type == "MultiPolygon":
-        simplified = max(simplified.geoms, key=lambda g: g.area)
-    elif simplified.geom_type == "GeometryCollection":
-        polys = [
-            g for g in simplified.geoms if g.geom_type in ("Polygon", "MultiPolygon")
-        ]
-        if not polys:
-            return None
-        simplified = max(polys, key=lambda g: g.area)
-        if simplified.geom_type == "MultiPolygon":
-            simplified = max(simplified.geoms, key=lambda g: g.area)
-
-    if simplified.geom_type != "Polygon" or simplified.is_empty:
+    wgs84 = transform_geometry(transformer.transform, simplified)
+    wgs84 = set_precision(wgs84, 0.000001, mode="valid_output")
+    wgs84 = _largest_polygonal(wgs84)
+    if wgs84 is None:
        return None

-    return {
-        "type": "Polygon",
-        "coordinates": transform_polygon(simplified),
-    }
+    return mapping(wgs84)


 def _fill_holes(geom):
@ -132,7 +132,11 @@ def write_district_geojson(
 ) -> int:
    """Group postcodes by district, write GeoJSON files. Returns file count."""
    units_dir = output_dir / "units"
-    units_dir.mkdir(parents=True, exist_ok=True)
+    tmp_units_dir = output_dir / "units.tmp"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    if tmp_units_dir.exists():
+        shutil.rmtree(tmp_units_dir)
+    tmp_units_dir.mkdir(parents=True)

    by_district: dict[str, list[tuple[str, Polygon | MultiPolygon]]] = defaultdict(list)
    for pc, geom in postcodes.items():
@ -141,14 +145,23 @@ def write_district_geojson(
        by_district[district].append((pc, geom))

    file_count = 0
+    seen_postcodes: set[str] = set()
    for district, entries in tqdm(
        sorted(by_district.items()), desc="Writing GeoJSON", unit="file"
    ):
        features = []
        for pc, geom in sorted(entries, key=lambda x: x[0]):
+            if pc in seen_postcodes:
+                raise ValueError(f"Duplicate postcode boundary feature: {pc}")
+            seen_postcodes.add(pc)
            geojson_geom = to_wgs84_geojson(geom)
            if geojson_geom is None:
-                continue
+                raise ValueError(f"Postcode boundary collapsed to empty geometry: {pc}")
+            written_geom = shape(geojson_geom)
+            if written_geom.is_empty or not written_geom.is_valid:
+                raise ValueError(
+                    f"Invalid postcode boundary geometry after output: {pc}"
+                )
            mapit_code = pc.replace(" ", "")
            features.append(
                {
@ -165,9 +178,12 @@ def write_district_geojson(
            continue

        collection = {"type": "FeatureCollection", "features": features}
-        out_path = units_dir / f"{district}.geojson"
+        out_path = tmp_units_dir / f"{district}.geojson"
        with open(out_path, "w") as f:
            json.dump(collection, f, separators=(",", ":"))
        file_count += 1

+    if units_dir.exists():
+        shutil.rmtree(units_dir)
+    tmp_units_dir.replace(units_dir)
    return file_count
--- a/pipeline/transform/postcode_boundaries/test_postcode_boundaries.py
+++ b/pipeline/transform/postcode_boundaries/test_postcode_boundaries.py
@ -3,6 +3,8 @@
 Each test targets a specific bug or edge case identified during code review.
 """

+import json
+
 import numpy as np
 import polars as pl
 import pytest
@ -11,7 +13,12 @@ from shapely.ops import unary_union

 from .oa_boundaries import parse_gpkg_geometry
 from .greenspace import subtract_greenspace
-from .output import _fill_holes, merge_fragments, to_wgs84_geojson
+from .output import (
+    _fill_holes,
+    merge_fragments,
+    to_wgs84_geojson,
+    write_district_geojson,
+)
 from .process_oa import _extract_polygonal, process_oa
 from .uprn import get_oa_uprns, load_uprns
 from .voronoi import _equal_split_fallback, compute_voronoi_regions
@ -154,6 +161,7 @@ class TestWhitespacePostcodes:
                "pcds": ["AA1 1AA", "AA1 1AB"],
                "east1m": [500010, 500030],
                "north1m": [180010, 180020],
+                "oa21cd": ["E00000001", "E00000001"],
                "doterm": ["2020-01-01", None],
                "ctry25cd": ["E92000001", "E92000001"],
            }
@ -165,6 +173,65 @@ class TestWhitespacePostcodes:

        assert loaded_df["PCDS"].to_list() == ["AA1 1AB"]

+    def test_arcgis_filters_to_active_english_postcodes(self, tmp_path):
+        uprns = pl.DataFrame(
+            {
+                "GRIDGB1E": [500010, 500020],
+                "GRIDGB1N": [180010, 180020],
+                "PCDS": ["AA1 1AA", "CF1 1AA"],
+                "OA21CD": ["E00000001", "E00000001"],
+            }
+        )
+        uprn_path = tmp_path / "uprn.parquet"
+        uprns.write_parquet(uprn_path)
+        arcgis = pl.DataFrame(
+            {
+                "pcds": ["AA1 1AA", "CF1 1AA"],
+                "east1m": [500010, 300010],
+                "north1m": [180010, 220010],
+                "oa21cd": ["E00000001", "W00000001"],
+                "doterm": [None, None],
+                "ctry25cd": ["E92000001", "W92000004"],
+            }
+        )
+        arcgis_path = tmp_path / "arcgis.parquet"
+        arcgis.write_parquet(arcgis_path)
+
+        loaded_df, _offsets = load_uprns(uprn_path, arcgis_path)
+
+        assert loaded_df["PCDS"].to_list() == ["AA1 1AA"]
+
+    def test_arcgis_adds_centroid_seed_for_active_postcode_without_uprn(self, tmp_path):
+        uprns = pl.DataFrame(
+            {
+                "GRIDGB1E": [500010],
+                "GRIDGB1N": [180010],
+                "PCDS": ["AA1 1AA"],
+                "OA21CD": ["E00000001"],
+            }
+        )
+        uprn_path = tmp_path / "uprn.parquet"
+        uprns.write_parquet(uprn_path)
+        arcgis = pl.DataFrame(
+            {
+                "pcds": ["AA1 1AA", "BB1 1BB"],
+                "east1m": [500010, 510000],
+                "north1m": [180010, 190000],
+                "oa21cd": ["E00000001", "E00000002"],
+                "doterm": [None, None],
+                "ctry25cd": ["E92000001", "E92000001"],
+            }
+        )
+        arcgis_path = tmp_path / "arcgis.parquet"
+        arcgis.write_parquet(arcgis_path)
+
+        loaded_df, offsets = load_uprns(uprn_path, arcgis_path)
+
+        assert set(loaded_df["PCDS"].to_list()) == {"AA1 1AA", "BB1 1BB"}
+        points, postcodes = get_oa_uprns(loaded_df, offsets, "E00000002")
+        assert postcodes == ["BB1 1BB"]
+        assert points.tolist() == [[510000.0, 190000.0]]
+

 # ---------------------------------------------------------------------------
 # Bug 3: Voronoi deduplication is first-seen-wins
@ -450,7 +517,9 @@ class TestProcessOAInspireParcelAssignment:
        )
        postcodes = ["A", "B"]

-        fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
+        fragments = process_oa(
+            oa_geom, points, postcodes, inspire_candidates=[left, right]
+        )
        frag_dict = dict(fragments)

        assert "A" in frag_dict and "B" in frag_dict
@ -494,7 +563,9 @@ class TestProcessOAInspireParcelAssignment:
        )
        postcodes = ["A", "B"]

-        fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
+        fragments = process_oa(
+            oa_geom, points, postcodes, inspire_candidates=[left, right]
+        )
        frag_dict = dict(fragments)

        assert "A" in frag_dict and "B" in frag_dict
@ -539,7 +610,9 @@ class TestProcessOAInspireParcelAssignment:
        )
        postcodes = ["A", "B"]

-        fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[straddling])
+        fragments = process_oa(
+            oa_geom, points, postcodes, inspire_candidates=[straddling]
+        )

        for _, geom in fragments:
            assert geom.difference(oa_geom).area < 0.01
@ -651,6 +724,22 @@ class TestToWgs84Geojson:
            assert lon_dp <= 6, f"Longitude {lon_s} has {lon_dp} decimal places"
            assert lat_dp <= 6, f"Latitude {lat_s} has {lat_dp} decimal places"

+    def test_write_district_geojson_replaces_stale_units(self, tmp_path):
+        stale_units = tmp_path / "units"
+        stale_units.mkdir()
+        (stale_units / "ZZ1.geojson").write_text(
+            json.dumps({"type": "FeatureCollection", "features": []})
+        )
+
+        file_count = write_district_geojson(
+            {"AA1 1AA": box(530000, 180000, 530100, 180100)}, tmp_path
+        )
+
+        assert file_count == 1
+        assert not (stale_units / "ZZ1.geojson").exists()
+        written = json.loads((stale_units / "AA1.geojson").read_text())
+        assert written["features"][0]["properties"]["postcodes"] == "AA1 1AA"
+

 # ---------------------------------------------------------------------------
 # Edge case: parse_gpkg_geometry rejects unknown envelope types
--- a/pipeline/transform/postcode_boundaries/uprn.py
+++ b/pipeline/transform/postcode_boundaries/uprn.py
@ -13,6 +13,33 @@ def _canonical_postcode_expr(name: str) -> pl.Expr:
    return pl.col(name).str.strip_chars().str.to_uppercase()


+def _active_english_arcgis_postcodes(arcgis_path: Path) -> pl.LazyFrame:
+    return (
+        pl.read_parquet(
+            arcgis_path,
+            columns=["pcds", "east1m", "north1m", "oa21cd", "ctry25cd", "doterm"],
+        )
+        .lazy()
+        .filter(pl.col("ctry25cd") == "E92000001")
+        .filter(pl.col("doterm").cast(pl.Utf8).is_null())
+        .select(
+            _canonical_postcode_expr("pcds").alias("PCDS"),
+            pl.col("east1m").cast(pl.Float64).alias("GRIDGB1E"),
+            pl.col("north1m").cast(pl.Float64).alias("GRIDGB1N"),
+            pl.col("oa21cd").alias("OA21CD"),
+        )
+        .filter(
+            pl.col("PCDS").is_not_null()
+            & (pl.col("PCDS") != "")
+            & pl.col("GRIDGB1E").is_not_null()
+            & pl.col("GRIDGB1N").is_not_null()
+            & pl.col("OA21CD").is_not_null()
+            & pl.col("OA21CD").str.starts_with("E")
+        )
+        .unique("PCDS")
+    )
+
+
 def load_uprns(
    uprn_path: Path, arcgis_path: Path | None = None
 ) -> tuple[pl.DataFrame, dict[str, tuple[int, int]]]:
@ -25,6 +52,7 @@ def load_uprns(

    print("Loading UPRN lookup...")
    mapping = None
+    active_postcode_points = None
    if arcgis_path is not None:
        mapping = (
            build_postcode_mapping(arcgis_path)
@ -34,6 +62,7 @@ def load_uprns(
            )
            .unique("old_postcode")
        )
+        active_postcode_points = _active_english_arcgis_postcodes(arcgis_path)

    # Sort via streaming sink to avoid polars doubling memory during in-memory sort
    with tempfile.NamedTemporaryFile(
@ -51,11 +80,21 @@ def load_uprns(

    if mapping is not None and mapping.height > 0:
        uprns = (
-            uprns.join(mapping.lazy(), left_on="PCDS", right_on="old_postcode", how="left")
+            uprns.join(
+                mapping.lazy(), left_on="PCDS", right_on="old_postcode", how="left"
+            )
            .with_columns(pl.coalesce("new_postcode", "PCDS").alias("PCDS"))
            .select("GRIDGB1E", "GRIDGB1N", "PCDS", "OA21CD")
        )

+    if active_postcode_points is not None:
+        active_postcodes = active_postcode_points.select("PCDS").unique()
+        uprns = uprns.join(active_postcodes, on="PCDS", how="semi")
+        missing_active = active_postcode_points.join(
+            uprns.select("PCDS").unique(), on="PCDS", how="anti"
+        ).select("GRIDGB1E", "GRIDGB1N", "PCDS", "OA21CD")
+        uprns = pl.concat([uprns, missing_active], how="vertical_relaxed")
+
    uprns.sort("OA21CD").sink_parquet(tmp_path)
    release_memory()

--- a/pipeline/transform/test_crime_hotspot_tiles.py
+++ b/pipeline/transform/test_crime_hotspot_tiles.py
@ -0,0 +1,52 @@
+import json
+
+from pipeline.transform.crime_hotspot_tiles import _write_geojsonseq
+
+_HEADER = (
+    "Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,"
+    "LSOA code,LSOA name,Crime type,Last outcome category,Context"
+)
+
+
+def _row(lon, lat, month, crime_type):
+    return f",{month},F,F,{lon},{lat},On or near X,E01000001,L,{crime_type},U,"
+
+
+def _write_csv(path, rows):
+    path.write_text("\n".join([_HEADER, *rows]) + "\n")
+
+
+def test_write_geojsonseq_collapses_shared_anchors_into_weighted_features(tmp_path):
+    csv = tmp_path / "2024-01-test-street.csv"
+    _write_csv(
+        csv,
+        [
+            # Two incidents snapped to the exact same anchor/month/type -> one
+            # feature with count=2.
+            _row(-0.1, 51.5, "2024-01", "Burglary"),
+            _row(-0.1, 51.5, "2024-01", "Burglary"),
+            # Same coord, different crime type -> kept separate (per-type filter).
+            _row(-0.1, 51.5, "2024-01", "Robbery"),
+            # Out of bounds -> dropped entirely.
+            _row(-0.1, 80.0, "2024-01", "Burglary"),
+            # Missing coordinate -> dropped entirely.
+            _row("", "", "2024-01", "Burglary"),
+        ],
+    )
+
+    out = tmp_path / "hotspots.geojsonseq"
+    feature_count, incident_count = _write_geojsonseq([csv], out)
+
+    features = [json.loads(line) for line in out.read_text().splitlines()]
+    assert feature_count == 2
+    assert incident_count == 3  # 2 burglaries + 1 robbery, in-bounds only
+
+    by_type = {f["properties"]["crime_type"]: f["properties"] for f in features}
+    # The busy anchor is a single feature carrying its full incident weight,
+    # so tippecanoe's density thinning can no longer silently erase it.
+    assert by_type["Burglary"]["count"] == 2
+    assert by_type["Burglary"]["weight"] == 2
+    assert by_type["Robbery"]["count"] == 1
+    # Geometry preserved as [lon, lat].
+    assert by_type["Burglary"]["count"] == 2
+    assert all(f["geometry"]["coordinates"] == [-0.1, 51.5] for f in features)
--- a/pipeline/transform/test_crime_spatial.py
+++ b/pipeline/transform/test_crime_spatial.py
@ -1,9 +1,13 @@
 import json

+import numpy as np
 import polars as pl
+import pytest
+import shapely
 from pyproj import Transformer

 from pipeline.transform.crime_spatial import transform_crime_spatial
+from pipeline.transform.postcode_boundaries.loader import load_postcode_polygons

 _TO_WGS84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)

@ -82,7 +86,10 @@ def test_buffer_overlap_counts_for_each_postcode(tmp_path):

    output = tmp_path / "crime_by_postcode.parquet"
    by_year = tmp_path / "crime_by_postcode_by_year.parquet"
-    transform_crime_spatial(crime, units, output, by_year)
+    # Pin the 50m buffer the geometry above was designed around (the production
+    # default is now 100m). The three squares are equal-area, so area
+    # normalisation leaves the counts unchanged.
+    transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)

    rows = {
        r["postcode"]: r
@ -127,7 +134,7 @@ def test_by_year_annualises_and_rolls_up(tmp_path):

    output = tmp_path / "crime_by_postcode.parquet"
    by_year = tmp_path / "crime_by_postcode_by_year.parquet"
-    transform_crime_spatial(crime, units, output, by_year)
+    transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)

    by_year_df = pl.read_parquet(by_year)
    assert by_year_df.height == 1
@ -145,3 +152,130 @@ def test_by_year_annualises_and_rolls_up(tmp_path):
    # 2023 serious = Burglary(12) + Robbery(12) = 24; 2024 = Burglary(12).
    assert serious[2023] == 24.0
    assert serious[2024] == 12.0
+
+
+def test_area_normalisation_divides_out_buffered_catchment(tmp_path):
+    # Three postcodes of increasing footprint, each with exactly one incident in
+    # its buffer. Normalisation rescales by median_catchment / buffered_area, so
+    # the smallest scores highest and the median-sized one is unchanged -- i.e.
+    # the metric is a density. Dividing by the *buffered* catchment (not the raw
+    # polygon) means the fixed buffer-ring floor keeps the spread gentle, so the
+    # tiniest postcode is not blown up out of proportion.
+    units = tmp_path / "units"
+    _write_boundaries(
+        units,
+        {
+            "AB1": [
+                _square_feature("AB1 1AA", 1000, 1000, 1010, 1010),  # 10x10
+                _square_feature("AB1 1AB", 3000, 3000, 3010, 3020),  # 10x20 (median)
+                _square_feature("AB1 1AC", 5000, 5000, 5020, 5020),  # 20x20
+            ]
+        },
+    )
+
+    crime = tmp_path / "crime"
+    _write_month(
+        crime,
+        "2024-01",
+        [
+            _crime_row("2024-01", 1005, 1005, "Burglary"),
+            _crime_row("2024-01", 3005, 3010, "Burglary"),
+            _crime_row("2024-01", 5010, 5010, "Burglary"),
+        ],
+    )
+
+    output = tmp_path / "crime_by_postcode.parquet"
+    by_year = tmp_path / "crime_by_postcode_by_year.parquet"
+    transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
+
+    # Re-derive the expected values from the same buffered catchment areas: each
+    # postcode is 12/yr before normalisation, then x (median_buf / buffered_area).
+    postcodes, polygons = load_postcode_polygons(units)
+    buf_area = {
+        pc: float(shapely.area(shapely.buffer(poly, 50.0, quad_segs=8)))
+        for pc, poly in zip(postcodes, polygons)
+    }
+    median_buf = float(np.median(list(buf_area.values())))
+    expected = {pc: 12.0 * median_buf / buf_area[pc] for pc in buf_area}
+
+    rows = {r["postcode"]: r for r in pl.read_parquet(output).to_dicts()}
+    for pc, exp in expected.items():
+        assert rows[pc]["Burglary (avg/yr)"] == pytest.approx(exp, abs=0.1)
+
+    # Median catchment unchanged; ordering is by inverse buffered area, but the
+    # buffer-ring floor keeps the spread far below the ~4x raw-area ratio.
+    assert rows["AB1 1AB"]["Burglary (avg/yr)"] == pytest.approx(12.0, abs=0.05)
+    small = rows["AB1 1AA"]["Burglary (avg/yr)"]
+    big = rows["AB1 1AC"]["Burglary (avg/yr)"]
+    assert small > 12.0 > big
+    assert small / big < 1.5
+
+    # by-year series carries the same normalisation.
+    by_year_df = pl.read_parquet(by_year)
+    small_row = by_year_df.filter(pl.col("postcode") == "AB1 1AA").row(0, named=True)
+    assert small_row["Burglary (by year)"] == [
+        {"year": 2024, "count": pytest.approx(expected["AB1 1AA"], abs=0.1)}
+    ]
+
+
+def test_avg_yr_is_simple_mean_of_year_bars(tmp_path):
+    # Uneven month coverage across years: 2023 has 1 month (2 incidents -> 24/yr),
+    # 2024 has 2 months (2 incidents -> 12/yr). The headline must be the *simple*
+    # mean of the bars (24+12)/2 = 18, not the month-weighted pooled rate
+    # (4 incidents / 3 months * 12 = 16).
+    units = tmp_path / "units"
+    _write_boundaries(
+        units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
+    )
+
+    crime = tmp_path / "crime"
+    _write_month(
+        crime,
+        "2023-01",
+        [
+            _crime_row("2023-01", 1005, 1005, "Burglary"),
+            _crime_row("2023-01", 1005, 1005, "Burglary"),
+        ],
+    )
+    _write_month(crime, "2024-01", [_crime_row("2024-01", 1005, 1005, "Burglary")])
+    _write_month(crime, "2024-02", [_crime_row("2024-02", 1005, 1005, "Burglary")])
+
+    output = tmp_path / "crime_by_postcode.parquet"
+    by_year = tmp_path / "crime_by_postcode_by_year.parquet"
+    transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
+
+    avg = pl.read_parquet(output).row(0, named=True)
+    assert avg["Burglary (avg/yr)"] == pytest.approx(18.0, abs=0.05)
+
+    row = pl.read_parquet(by_year).row(0, named=True)
+    bars = {p["year"]: p["count"] for p in row["Burglary (by year)"]}
+    assert bars == {2023: pytest.approx(24.0, abs=0.05), 2024: pytest.approx(12.0, abs=0.05)}
+
+
+def test_unknown_crime_type_is_dropped_with_warning(tmp_path, capsys):
+    units = tmp_path / "units"
+    _write_boundaries(
+        units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
+    )
+
+    crime = tmp_path / "crime"
+    _write_month(
+        crime,
+        "2024-01",
+        [
+            _crime_row("2024-01", 1005, 1005, "Burglary"),
+            _crime_row("2024-01", 1005, 1005, "Cyber fraud"),
+        ],
+    )
+
+    output = tmp_path / "crime_by_postcode.parquet"
+    by_year = tmp_path / "crime_by_postcode_by_year.parquet"
+    transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
+
+    columns = pl.read_parquet(output).columns
+    # The unknown type is dropped (no column for it) but a warning is emitted.
+    assert "Cyber fraud (avg/yr)" not in columns
+    assert "Burglary (avg/yr)" in columns
+    err = capsys.readouterr().err
+    assert "Cyber fraud" in err
+    assert "WARNING" in err
--- a/pipeline/transform/test_merge.py
+++ b/pipeline/transform/test_merge.py
@ -10,8 +10,10 @@ from pipeline.transform.merge import (
    LISTED_BUILDING_FEATURE,
    TREE_DENSITY_FEATURE,
    _LISTING_OVERLAY_SOURCES,
+    _active_english_postcode_area,
    _build_unmatched_listing_seed_rows,
    _canonical_postcode_expr,
+    _filter_to_active_english_postcodes,
    _finalize_listings,
    _integrate_listings,
    _match_direct_epc,
@ -24,8 +26,11 @@ from pipeline.transform.merge import (
    _matched_listed_building_flags,
    _postcode_conservation_area_flags,
    _postcode_listed_building_candidates,
+    _remap_terminated_postcodes,
+    _split_normal_outputs,
    _tree_density_by_postcode,
    _validate_lad_source_coverage,
+    _validate_postcode_feature_output,
    _validate_property_postcodes,
 )

@ -79,6 +84,113 @@ def test_crime_columns_are_spatial_counts_not_per_capita() -> None:
    assert "Minor crime per 1k residents (avg/yr)" not in _AREA_COLUMNS


+def test_active_english_postcode_area_filters_to_active_england() -> None:
+    arcgis = pl.DataFrame(
+        {
+            "pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
+            "ctry25cd": ["E92000001", "E92000001", "W92000004"],
+            "doterm": [None, "2020-01-01", None],
+            "lat": [51.0, 51.1, 52.0],
+            "long": [-0.1, -0.2, -3.0],
+            "lsoa21cd": ["L1", "L2", "L3"],
+            "oa21cd": ["O1", "O2", "O3"],
+            "pcon24cd": ["P1", "P2", "P3"],
+        }
+    )
+
+    result = _active_english_postcode_area(arcgis.lazy()).collect()
+
+    assert result.to_dicts() == [
+        {
+            "postcode": "AA1 1AA",
+            "lat": 51.0,
+            "lon": -0.1,
+            "ctry25cd": "E92000001",
+            "lsoa21": "L1",
+            "oa21": "O1",
+            "pcon": "P1",
+        }
+    ]
+
+
+def test_remap_then_active_filter_keeps_terminated_english_properties() -> None:
+    wide = pl.DataFrame(
+        {
+            "postcode": ["OLD 1AA", "NEW 1AA", "CF1 1AA"],
+            "row_id": [1, 2, 3],
+        }
+    ).lazy()
+    mapping = pl.DataFrame(
+        {"old_postcode": ["OLD 1AA"], "new_postcode": ["NEW 1AA"]}
+    ).lazy()
+    active_postcodes = pl.DataFrame({"postcode": ["NEW 1AA"]}).lazy()
+
+    result = (
+        _filter_to_active_english_postcodes(
+            _remap_terminated_postcodes(wide, mapping), active_postcodes
+        )
+        .collect()
+        .sort("row_id")
+    )
+
+    assert result.to_dicts() == [
+        {"postcode": "NEW 1AA", "row_id": 1},
+        {"postcode": "NEW 1AA", "row_id": 2},
+    ]
+
+
+def test_split_normal_outputs_uses_postcode_feature_universe() -> None:
+    df = pl.DataFrame(
+        {
+            "Postcode": ["AA1 1AA"],
+            "Address per Property Register": ["1 Example Road"],
+            "Last known price": [250_000],
+            "lat": [51.0],
+            "lon": [-0.1],
+            "ctry25cd": ["E92000001"],
+            "lsoa21": ["L1"],
+        }
+    )
+    postcode_features = pl.DataFrame(
+        {
+            "Postcode": ["AA1 1AA", "BB1 1BB"],
+            "lat": [51.0, 52.0],
+            "lon": [-0.1, -0.2],
+            "ctry25cd": ["E92000001", "E92000001"],
+            "lsoa21": ["L1", "L2"],
+            "Distance to nearest amenity (Park) (km)": [0.3, 0.8],
+        }
+    )
+
+    postcode_df, properties_df = _split_normal_outputs(
+        df, postcode_features, expected_postcode_count=2
+    )
+
+    assert postcode_df["Postcode"].to_list() == ["AA1 1AA", "BB1 1BB"]
+    assert "Distance to nearest amenity (Park) (km)" in postcode_df.columns
+    assert properties_df.to_dicts() == [
+        {
+            "Postcode": "AA1 1AA",
+            "Address per Property Register": "1 Example Road",
+            "Last known price": 250_000,
+        }
+    ]
+
+
+def test_postcode_feature_validation_rejects_unsupported_or_ungeocoded_rows() -> None:
+    postcode_df = pl.DataFrame(
+        {
+            "Postcode": ["AA1 1AA", "CF1 1AA"],
+            "lat": [51.0, None],
+            "lon": [-0.1, None],
+            "ctry25cd": ["E92000001", "W92000004"],
+        }
+    )
+
+    with pytest.raises(ValueError, match="unsupported or ungeocoded"):
+        _validate_postcode_feature_output(postcode_df, expected_postcode_count=2)
+
+
 def test_listed_building_feature_is_property_level() -> None:
    assert LISTED_BUILDING_FEATURE not in _AREA_COLUMNS

@ -418,9 +530,7 @@ def test_build_unmatched_listing_seed_rows_fills_property_shape_fields(
    )
    unmatched_idxs = listings.select("_listing_idx")

-    seed = _build_unmatched_listing_seed_rows(
-        unmatched_idxs, listings, template_schema
-    )
+    seed = _build_unmatched_listing_seed_rows(unmatched_idxs, listings, template_schema)

    assert seed.height == 1
    assert seed["postcode"].to_list() == ["SW1A 1AA"]
@ -550,7 +660,12 @@ def test_match_direct_epc_matches_by_uprn_across_postcodes() -> None:
            [{"_listing_uprn": "100000000001", "_listing_match_postcode": "ZZ99ZZ"}]
        ),
        _direct_epc_candidates(
-            [{"_direct_epc_uprn": "100000000001", "_direct_epc_match_postcode": "AA11AA"}]
+            [
+                {
+                    "_direct_epc_uprn": "100000000001",
+                    "_direct_epc_match_postcode": "AA11AA",
+                }
+            ]
        ),
    )

--- a/pipeline/transform/test_tree_density.py
+++ b/pipeline/transform/test_tree_density.py
@ -1,81 +1,105 @@
 import math
+import zipfile
 from pathlib import Path

 import numpy as np
 import polars as pl
+import pyogrio
 import pytest
 import shapely

 from pipeline.transform.tree_density import (
-    STREET_TREE_COVERAGE_COL,
-    STREET_TREE_DENSITY_COL,
-    _add_nfi_batch,
+    _accumulate_clipped_area,
    _coverage_percentile_expr,
+    _finalize_metrics,
+    _geometry_column,
+    _layers,
    _metric_columns,
+    _nfi_dataset_path,
    _postcode_buffers,
    _postcode_density_percentile_col,
+    _safe_extract_zip_dir,
    _with_postcode_density_percentiles,
-    _write_street_rollups,
 )


-def test_nfi_accumulation_adds_only_clipped_overlap_area() -> None:
+def test_accumulate_clipped_area_adds_only_in_buffer_overlap() -> None:
    radius_m = 50
    points = pl.DataFrame({"postcode": ["A", "B"], "x": [0.0, 1000.0], "y": [0.0, 0.0]})
    circles, tree = _postcode_buffers(points, radius_m)
    buffer_area = math.pi * radius_m * radius_m

-    # A large woodland square centred on postcode A fully covers A's circle.
+    # A large square centred on postcode A fully covers A's buffer circle.
    canopy_area = np.zeros(2)
-    feature_count = np.zeros(2, dtype=np.uint32)
    big = shapely.box(-500, -500, 500, 500)  # 1,000,000 sqm parcel
-    _add_nfi_batch(
-        np.array([big], dtype=object),
-        np.array(["Woodland"], dtype=object),
-        circles,
-        tree,
-        canopy_area,
-        feature_count,
-        radius_m,
-    )
+    _accumulate_clipped_area(np.array([big], dtype=object), circles, tree, canopy_area)
    # Only the clipped circle area is added (the 32-gon buffer approximates the
    # circle to ~1%), NOT the full 1,000,000 sqm polygon.
    assert canopy_area[0] == pytest.approx(buffer_area, rel=1e-2)
-    assert canopy_area[0] <= buffer_area  # never exceeds the buffer area
+    assert canopy_area[0] <= buffer_area  # never exceeds the true buffer area
    assert canopy_area[1] == 0.0  # postcode B is 1km away, no overlap
-    assert feature_count.tolist() == [1, 0]

    # A large parcel that only slivers into B's circle must add only the sliver,
-    # not its full area -- the failure mode the old centroid path could not avoid.
+    # not its full area -- the failure mode a centroid/full-area path could not avoid.
    canopy_area = np.zeros(2)
-    feature_count = np.zeros(2, dtype=np.uint32)
    sliver = shapely.box(1040, -500, 2000, 500)  # left edge 10m inside B's circle
-    _add_nfi_batch(
-        np.array([sliver], dtype=object),
-        np.array(["Woodland"], dtype=object),
-        circles,
-        tree,
-        canopy_area,
-        feature_count,
-        radius_m,
+    _accumulate_clipped_area(
+        np.array([sliver], dtype=object), circles, tree, canopy_area
    )
    assert canopy_area[0] == 0.0
    assert 0.0 < canopy_area[1] < buffer_area  # tiny segment, far below 1M sqm

-    # Non-woodland categories contribute nothing.
-    canopy_area = np.zeros(2)
-    feature_count = np.zeros(2, dtype=np.uint32)
-    _add_nfi_batch(
-        np.array([big], dtype=object),
-        np.array(["Non woodland"], dtype=object),
+
+def test_accumulate_clipped_area_drops_missing_and_empty_geometry() -> None:
+    radius_m = 50
+    points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
+    circles, tree = _postcode_buffers(points, radius_m)
+
+    canopy_area = np.zeros(1)
+    geoms = np.array(
+        [None, shapely.from_wkt("POLYGON EMPTY"), shapely.box(-10, -10, 10, 10)],
+        dtype=object,
+    )
+    # A None and an empty geometry must be skipped, not crash, and only the real
+    # 400 sqm box is accumulated (it is fully inside the buffer).
+    _accumulate_clipped_area(geoms, circles, tree, canopy_area)
+    assert canopy_area[0] == pytest.approx(400.0)
+
+
+def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
+    radius_m = 50
+    points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
+    circles, tree = _postcode_buffers(points, radius_m)
+
+    canopy_area = np.zeros(1)
+    height_weighted_sum = np.zeros(1)
+    height_weight = np.zeros(1)
+    geoms = np.array(
+        [
+            shapely.box(-10, -10, 0, 0),  # 100 sqm, fully inside
+            shapely.box(0, 0, 20, 20),  # 400 sqm, fully inside
+            shapely.box(-5, 0, 0, 5),  # 25 sqm, NaN height -> ignored for height
+        ],
+        dtype=object,
+    )
+    height = np.array([5.0, 10.0, np.nan])
+
+    _accumulate_clipped_area(
+        geoms,
        circles,
        tree,
        canopy_area,
-        feature_count,
-        radius_m,
+        height=height,
+        height_weighted_sum=height_weighted_sum,
+        height_weight=height_weight,
    )
-    assert canopy_area.tolist() == [0.0, 0.0]
-    assert feature_count.tolist() == [0, 0]
+
+    # All three clipped areas count toward canopy; only the finite-height ones
+    # contribute to the area-weighted mean height.
+    assert canopy_area[0] == pytest.approx(525.0)
+    assert height_weight[0] == pytest.approx(500.0)
+    mean_height = height_weighted_sum[0] / height_weight[0]
+    assert mean_height == pytest.approx((5.0 * 100 + 10.0 * 400) / 500)  # 9.0


 def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
@ -88,76 +112,142 @@ def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
    assert result["percentile"].to_list() == [0.0, 50.0, 100.0, None]


-def test_coverage_percentile_expr_uses_exact_scale_endpoints() -> None:
+def test_coverage_percentile_expr_uses_tie_consistent_average_rank() -> None:
+    # Tied extremes share their mean rank instead of being pinned to exact 0/100,
+    # so the whole scale runs on one consistent average-rank formula.
    df = pl.DataFrame({"coverage": [0.0, 0.0, 5.0, 10.0, 10.0]})

    result = df.lazy().with_columns(
        _coverage_percentile_expr("coverage", "percentile")
    ).collect()

-    assert result["percentile"].to_list() == [0.0, 0.0, 50.0, 100.0, 100.0]
+    assert result["percentile"].to_list() == [12.5, 12.5, 50.0, 87.5, 87.5]


-def test_street_rollup_percentiles_are_ranked_over_raw_street_coverage(
-    tmp_path: Path,
-) -> None:
+def test_coverage_percentile_expr_all_equal_is_neutral_midpoint() -> None:
+    all_equal = pl.DataFrame({"coverage": [5.0, 5.0, 5.0]})
+    single = pl.DataFrame({"coverage": [7.0]})
+    with_null = pl.DataFrame({"coverage": [None, 5.0, 5.0, 5.0]})
+
+    def percentiles(df: pl.DataFrame) -> list:
+        return (
+            df.lazy()
+            .with_columns(_coverage_percentile_expr("coverage", "percentile"))
+            .collect()["percentile"]
+            .to_list()
+        )
+
+    assert percentiles(all_equal) == [50.0, 50.0, 50.0]
+    assert percentiles(single) == [50.0]
+    assert percentiles(with_null) == [None, 50.0, 50.0, 50.0]
+
+
+def test_finalize_metrics_caps_density_keeps_raw_area_and_weights_height() -> None:
    radius_m = 50
-    density_col, area_col, count_col, height_col = _metric_columns(radius_m)
+    buffer_area = math.pi * radius_m * radius_m
+    density_col, area_col, height_col = _metric_columns(radius_m)
+
+    points = pl.DataFrame({"postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"]})
+    canopy_area = np.array([0.0, buffer_area * 0.5, buffer_area * 2.0])
+    # Postcode 0: no height samples -> null. Postcode 1: area-weighted mean = 5.
+    height_weighted_sum = np.array([0.0, 500.0, 0.0])
+    height_weight = np.array([0.0, 100.0, 0.0])
+
+    metrics = _finalize_metrics(
+        points, canopy_area, height_weighted_sum, height_weight, radius_m
+    )
+
+    assert metrics[density_col].to_list() == [0.0, 50.0, 100.0]  # capped at 100
+    # area_col is the raw clipped accumulation, intentionally uncapped.
+    assert metrics[area_col].to_list() == pytest.approx(
+        [0.0, round(buffer_area * 0.5, 1), round(buffer_area * 2.0, 1)]
+    )
+    assert metrics[height_col].to_list() == [None, 5.0, None]
+    # The mixed-unit feature-count column has been removed entirely.
+    assert "Tree features within 50m" not in metrics.columns
+    assert set(metrics.columns) == {"postcode", density_col, area_col, height_col}
+
+
+def test_postcode_density_percentiles_rank_over_density() -> None:
+    radius_m = 50
+    density_col, area_col, height_col = _metric_columns(radius_m)
    percentile_col = _postcode_density_percentile_col(radius_m)

-    postcode_metrics = _with_postcode_density_percentiles(
+    metrics = _with_postcode_density_percentiles(
        pl.DataFrame(
            {
                "postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"],
                density_col: [10.0, 30.0, 50.0],
                area_col: [100.0, 300.0, 500.0],
-                count_col: [1, 3, 5],
                height_col: [4.0, 6.0, 8.0],
            }
        ),
        radius_m,
    )

-    price_paid = pl.DataFrame(
-        {
-            "postcode": ["AA1 1AA", "AA1 1AA", "AA1 1AB", "AA1 1AC"],
-            "paon": ["1", "2", "3", "4"],
-            "saon": ["", "", "", ""],
-            "street": ["Oak Road", "Oak Road", "Oak Road", "Elm Street"],
-            "locality": ["", "", "", ""],
-            "town_city": ["Test Town", "Test Town", "Test Town", "Test Town"],
-            "district": ["Test District"] * 4,
-            "county": ["Test County"] * 4,
-            "date_of_transfer": [
-                "2024-01-01",
-                "2024-01-02",
-                "2024-01-03",
-                "2024-01-04",
-            ],
-        }
+    assert percentile_col in metrics.columns
+    assert metrics[percentile_col].to_list() == [0.0, 50.0, 100.0]
+
+
+def test_safe_extract_zip_dir_rejects_path_traversal(tmp_path: Path) -> None:
+    malicious = tmp_path / "evil.zip"
+    with zipfile.ZipFile(malicious, "w") as archive:
+        archive.writestr("../escape.txt", "pwned")
+
+    with pytest.raises(ValueError, match="Unsafe path"):
+        _safe_extract_zip_dir(malicious, tmp_path / "extract", force=True)
+
+
+def test_safe_extract_zip_dir_extracts_benign_archive(tmp_path: Path) -> None:
+    benign = tmp_path / "ok.zip"
+    with zipfile.ZipFile(benign, "w") as archive:
+        archive.writestr("data/x.txt", "hello")
+
+    extract_dir = tmp_path / "extract"
+    result = _safe_extract_zip_dir(benign, extract_dir, force=True)
+    assert result == extract_dir
+    assert (extract_dir / "data" / "x.txt").read_text() == "hello"
+
+
+def test_geometry_column_resolution() -> None:
+    assert _geometry_column({"geometry_name": "SHAPE"}, ["MEANHT", "SHAPE"]) == "SHAPE"
+    assert _geometry_column({}, ["a", "wkb_geometry", "b"]) == "wkb_geometry"
+    assert _geometry_column({"geometry_name": None}, ["x", "geom"]) == "geom"
+    assert _geometry_column({}, ["a", "b", "c"]) == "c"  # last-column fallback
+
+
+def _zip_with_shapefiles(zip_path: Path, names: list[str]) -> None:
+    with zipfile.ZipFile(zip_path, "w") as archive:
+        for name in names:
+            archive.writestr(name, "")
+
+
+def test_nfi_dataset_path_requires_exactly_one_shapefile(tmp_path: Path) -> None:
+    multi = tmp_path / "multi.zip"
+    _zip_with_shapefiles(multi, ["a.shp", "b.shp"])
+    with pytest.raises(ValueError, match="exactly one shapefile"):
+        _nfi_dataset_path(multi, tmp_path / "multi_x", force_extract=True, use_vsizip=False)
+
+    none = tmp_path / "none.zip"
+    _zip_with_shapefiles(none, ["readme.txt"])
+    with pytest.raises(FileNotFoundError):
+        _nfi_dataset_path(none, tmp_path / "none_x", force_extract=True, use_vsizip=False)
+
+    one = tmp_path / "one.zip"
+    _zip_with_shapefiles(one, ["woodland.shp", "woodland.dbf"])
+    resolved = _nfi_dataset_path(
+        one, tmp_path / "one_x", force_extract=True, use_vsizip=False
    )
-    price_paid_path = tmp_path / "price-paid.parquet"
-    output_streets = tmp_path / "streets.parquet"
-    output_addresses = tmp_path / "addresses.parquet"
-    price_paid.write_parquet(price_paid_path)
+    assert resolved.endswith("woodland.shp")

-    _write_street_rollups(
-        postcode_metrics=postcode_metrics,
-        price_paid_path=price_paid_path,
-        output_streets=output_streets,
-        output_addresses=output_addresses,
-        radius_m=radius_m,
+
+def test_layers_selection_and_unknown(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(
+        pyogrio,
+        "list_layers",
+        lambda _path: [("L1", "Polygon"), ("L2", "Polygon")],
    )
-
-    streets = pl.read_parquet(output_streets).sort("street")
-    addresses = pl.read_parquet(output_addresses)
-
-    assert streets["street"].to_list() == ["Elm Street", "Oak Road"]
-    assert streets[STREET_TREE_COVERAGE_COL].to_list() == pytest.approx([50.0, 16.7])
-    assert streets.select("street", STREET_TREE_DENSITY_COL).rows() == [
-        ("Elm Street", 100.0),
-        ("Oak Road", 0.0),
-    ]
-    assert percentile_col in addresses.columns
-    assert STREET_TREE_COVERAGE_COL in addresses.columns
-    assert STREET_TREE_DENSITY_COL in addresses.columns
+    assert _layers("ignored", None) == ["L1", "L2"]
+    assert _layers("ignored", ("L2",)) == ["L2"]
+    with pytest.raises(ValueError, match="Unknown TOW layer"):
+        _layers("ignored", ("L3",))
--- a/pipeline/transform/tree_density.py
+++ b/pipeline/transform/tree_density.py
@ -1,16 +1,28 @@
-"""Derive street-scale tree density metrics from Forest Research TOW + NFI data.
+"""Derive postcode-scale tree density metrics from Forest Research TOW + NFI data.

 The Forest Research Trees Outside Woodland release is an Esri File Geodatabase
 inside property-data/FR_TOW_V1_ALL.zip. This transformer computes a compact
-postcode-level metric from the tree polygons, then optionally rolls that up to
-Price Paid street names so the dashboard can answer "what is this address's
-street like?" without loading the full geodatabase at runtime.
+postcode-level metric from the tree polygons so the dashboard can answer "how
+green is this postcode?" without loading the full geodatabase at runtime.

-TOW only covers trees *outside* woodland, so the National Forest Inventory (NFI)
-woodland layer is optionally unioned in. TOW canopy is accumulated by centroid
-proximity (tiny crowns), while large NFI woodland parcels are accumulated by
-true buffer-clipped intersection area so they cannot saturate a postcode from
-mere centroid proximity.
+Every postcode centroid is expanded into a radius-r buffer ("extended area").
+Both TOW tree crowns and National Forest Inventory (NFI) woodland parcels are
+accumulated by *true buffer-clipped intersection area*: only the part of each
+polygon that falls inside a postcode's buffer is counted, never the area that
+spills outside it. A crown straddling the buffer edge therefore contributes only
+its inside portion, and a parcel reaching into the buffer from outside is still
+counted -- no polygon can saturate a postcode from mere proximity.
+
+TOW only covers trees *outside* woodland, so the NFI woodland layer is the
+geometric complement of TOW and is optionally unioned in. The two products are
+*assumed disjoint*: clipped TOW crown area and clipped NFI woodland area are
+summed into the same per-postcode accumulator, so any spatial overlap between a
+TOW crown and an NFI parcel (boundary slop where "groups of trees" meet
+"woodland") would be double-counted. The final density is capped at 100% and
+_finalize_metrics logs how many postcodes exceed 100% raw coverage, which is a
+direct symptom of such overlap (or of overlapping crowns within one buffer); if
+that count is material the products are not disjoint and the NFI clip should be
+taken against the complement of TOW.
 """

 from __future__ import annotations
@ -25,16 +37,12 @@ import numpy as np
 import polars as pl
 import pyogrio
 import shapely
-from scipy.spatial import cKDTree


 TOW_GDB_NAME = "FR_TOW_V1_ALL.gdb"
-STREET_TREE_DENSITY_COL = "Street tree density percentile"
-STREET_TREE_COVERAGE_COL = "Street tree coverage (%)"
 POSTCODE_DENSITY_COL = "Tree canopy density within {radius}m (%)"
 POSTCODE_DENSITY_PERCENTILE_COL = "Tree canopy density percentile within {radius}m"
 POSTCODE_AREA_COL = "Tree canopy area within {radius}m (sqm)"
-POSTCODE_COUNT_COL = "Tree features within {radius}m"
 POSTCODE_HEIGHT_COL = "Mean TOW height within {radius}m (m)"

 # National Forest Inventory (NFI) woodland — the geometric complement of TOW.
@ -131,13 +139,24 @@ def _safe_extract_zip_dir(zip_path: Path, extract_dir: Path, force: bool) -> Pat
 def _nfi_dataset_path(
    zip_path: Path, extract_dir: Path, force_extract: bool, use_vsizip: bool
 ) -> str:
-    """Resolve the NFI woodland shapefile path, extracting the zip if needed."""
+    """Resolve the NFI woodland shapefile path, extracting the zip if needed.
+
+    Raises if the archive contains zero or more than one shapefile rather than
+    silently picking one, so an ambiguous NFI release fails loudly instead of
+    accumulating canopy from the wrong layer.
+    """
    if use_vsizip:
        return f"/vsizip/{zip_path.resolve()}"
    extracted = _safe_extract_zip_dir(zip_path, extract_dir, force_extract)
    shapefiles = sorted(extracted.rglob("*.shp"))
    if not shapefiles:
        raise FileNotFoundError(f"No .shp found inside {zip_path}")
+    if len(shapefiles) > 1:
+        names = ", ".join(path.name for path in shapefiles)
+        raise ValueError(
+            f"Expected exactly one shapefile inside {zip_path}, found {len(shapefiles)} "
+            f"({names}); cannot unambiguously pick the NFI woodland layer"
+        )
    return str(shapefiles[0])


@ -146,7 +165,7 @@ def _geometry_column(metadata: dict, column_names: list[str]) -> str:
    geometry_name = metadata.get("geometry_name")
    if geometry_name:
        return str(geometry_name)
-    for name in ("wkb_geometry", "geometry", "geom"):
+    for name in ("wkb_geometry", "geometry", "geom", "SHAPE"):
        if name in column_names:
            return name
    return column_names[-1]
@ -184,11 +203,10 @@ def _layers(dataset_path: str, selected_layers: tuple[str, ...] | None) -> list[
    return [layer for layer in available if layer in selected_layers]


-def _metric_columns(radius_m: int) -> tuple[str, str, str, str]:
+def _metric_columns(radius_m: int) -> tuple[str, str, str]:
    return (
        POSTCODE_DENSITY_COL.format(radius=radius_m),
        POSTCODE_AREA_COL.format(radius=radius_m),
-        POSTCODE_COUNT_COL.format(radius=radius_m),
        POSTCODE_HEIGHT_COL.format(radius=radius_m),
    )

@ -198,20 +216,23 @@ def _postcode_density_percentile_col(radius_m: int) -> str:


 def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
-    """Rank higher tree coverage higher on a 0-100 England-wide percentile scale."""
+    """Rank tree coverage on a 0-100 England-wide percentile scale.
+
+    A single tie-consistent average-rank formula is used for every value so the
+    scale is internally consistent end to end: tied values share their mean rank,
+    so the lowest coverage maps toward 0 and the highest toward 100 only when they
+    are not themselves tied. An all-equal (or single-value) column has no spread
+    and maps to the neutral midpoint (50).
+    """
    value = pl.col(column).fill_nan(None)
    non_null_count = value.count()
    rank = value.rank("average")
    return (
        pl.when(value.is_null())
        .then(None)
-        .when(value == value.min())
-        .then(0.0)
-        .when(value == value.max())
-        .then(100.0)
        .when(non_null_count > 1)
        .then(((rank - 1) / (non_null_count - 1) * 100).round(1))
-        .otherwise(100.0)
+        .otherwise(50.0)
        .cast(pl.Float32)
        .alias(alias)
    )
@ -220,7 +241,7 @@ def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
 def _with_postcode_density_percentiles(
    postcode_metrics: pl.DataFrame, radius_m: int
 ) -> pl.DataFrame:
-    density_col, _area_col, _count_col, _height_col = _metric_columns(radius_m)
+    density_col, _area_col, _height_col = _metric_columns(radius_m)
    return postcode_metrics.with_columns(
        _coverage_percentile_expr(
            density_col,
@ -229,28 +250,88 @@ def _with_postcode_density_percentiles(
    )


-def _accumulate_tree_metrics(
+def _postcode_buffers(
+    points: pl.DataFrame, radius_m: int
+) -> tuple[np.ndarray, shapely.STRtree]:
+    """Build a radius-r circle for every postcode plus an STRtree over them.
+
+    Circle index == postcode index, so an STRtree match resolves directly to the
+    postcode accumulator slot.
+    """
+    xy = points.select("x", "y").to_numpy()
+    circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
+    return circles, shapely.STRtree(circles)
+
+
+def _accumulate_clipped_area(
+    geoms: np.ndarray,
+    circles: np.ndarray,
+    tree: shapely.STRtree,
+    canopy_area: np.ndarray,
+    height: np.ndarray | None = None,
+    height_weighted_sum: np.ndarray | None = None,
+    height_weight: np.ndarray | None = None,
+) -> None:
+    """Add each polygon's in-buffer overlap area to every postcode it intersects.
+
+    Only area(polygon ∩ circle) is accumulated -- never the area of the polygon
+    that falls outside the postcode's extended buffer -- so a crown straddling
+    the buffer edge contributes only its inside portion and a large parcel cannot
+    saturate a postcode from mere proximity. When ``height`` is supplied the mean
+    feature height is accumulated weighted by that same clipped overlap area.
+    """
+    keep = ~shapely.is_missing(geoms) & ~shapely.is_empty(geoms)
+    geoms = geoms[keep]
+    if height is not None:
+        height = height[keep]
+    if geoms.size == 0:
+        return
+
+    # query(predicate="intersects") over the circle STRtree returns exactly the
+    # (polygon, circle) pairs whose clipped overlap can be positive -- i.e. the
+    # polygon overlaps that postcode's radius-r buffer.
+    geom_index, postcode_index = tree.query(geoms, predicate="intersects")
+    if geom_index.size == 0:
+        return
+
+    clipped_area = shapely.area(
+        shapely.intersection(geoms[geom_index], circles[postcode_index])
+    )
+    positive = clipped_area > 0
+    geom_index = geom_index[positive]
+    postcode_index = postcode_index[positive]
+    clipped_area = clipped_area[positive]
+
+    np.add.at(canopy_area, postcode_index, clipped_area)
+
+    if height is not None:
+        feature_height = height[geom_index]
+        finite = np.isfinite(feature_height)
+        if finite.any():
+            np.add.at(
+                height_weighted_sum,
+                postcode_index[finite],
+                feature_height[finite] * clipped_area[finite],
+            )
+            np.add.at(height_weight, postcode_index[finite], clipped_area[finite])
+
+
+def _accumulate_tow_metrics(
    dataset_path: str,
-    points: pl.DataFrame,
-    radius_m: int,
+    circles: np.ndarray,
+    tree: shapely.STRtree,
+    canopy_area: np.ndarray,
+    height_weighted_sum: np.ndarray,
+    height_weight: np.ndarray,
    batch_size: int,
    layer_names: tuple[str, ...] | None,
    max_features_per_layer: int | None,
-    workers: int,
-    canopy_area: np.ndarray,
-    feature_count: np.ndarray,
-    height_weighted_sum: np.ndarray,
-    height_weight: np.ndarray,
 ) -> None:
-    xy = points.select("x", "y").to_numpy()
-    tree = cKDTree(xy)
-
    layers = _layers(dataset_path, layer_names)
    print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}")

-    columns = ["Woodland_Type", "TOW_Area_M", "MEANHT"]
+    columns = ["MEANHT"]
    total_features_seen = 0
-    total_features_used = 0

    for layer in layers:
        info = pyogrio.read_info(dataset_path, layer=layer)
@ -263,7 +344,7 @@ def _accumulate_tree_metrics(
            columns=columns,
            batch_size=batch_size,
            use_pyarrow=True,
-        ) as (_meta, reader):
+        ) as (meta, reader):
            for batch_index, batch in enumerate(reader, start=1):
                if max_features_per_layer is not None:
                    remaining = max_features_per_layer - layer_features_seen
@ -275,135 +356,29 @@ def _accumulate_tree_metrics(
                layer_features_seen += batch.num_rows
                total_features_seen += batch.num_rows
                names = batch.schema.names
-                area = np.asarray(
-                    batch.column(names.index("TOW_Area_M")).to_numpy(zero_copy_only=False),
-                    dtype=np.float64,
-                )
+                geometry_column = _geometry_column(meta, names)
                height = np.asarray(
                    batch.column(names.index("MEANHT")).to_numpy(zero_copy_only=False),
                    dtype=np.float64,
                )
                geometry = np.asarray(
-                    batch.column(names.index("SHAPE")).to_numpy(zero_copy_only=False),
+                    batch.column(names.index(geometry_column)).to_numpy(
+                        zero_copy_only=False
+                    ),
                    dtype=object,
                )
-
-                valid = np.isfinite(area) & (area > 0)
-                if not valid.any():
-                    continue
-
-                geometry = geometry[valid]
-                area = area[valid]
-                height = height[valid]
-
-                centroids = shapely.centroid(shapely.from_wkb(geometry))
-                x = shapely.get_x(centroids)
-                y = shapely.get_y(centroids)
-                valid_xy = np.isfinite(x) & np.isfinite(y)
-                if not valid_xy.any():
-                    continue
-
-                x = x[valid_xy]
-                y = y[valid_xy]
-                area = area[valid_xy]
-                height = height[valid_xy]
-
-                nearby = tree.query_ball_point(
-                    np.column_stack((x, y)), radius_m, workers=workers
+                _accumulate_clipped_area(
+                    shapely.from_wkb(geometry),
+                    circles,
+                    tree,
+                    canopy_area,
+                    height=height,
+                    height_weighted_sum=height_weighted_sum,
+                    height_weight=height_weight,
                )
-                lengths = np.fromiter(
-                    (len(postcode_indexes) for postcode_indexes in nearby),
-                    dtype=np.int32,
-                    count=len(nearby),
-                )
-                matching_features = lengths > 0
-                if matching_features.any():
-                    postcode_indexes = np.concatenate(
-                        [indexes for indexes in nearby if indexes]
-                    ).astype(np.int64, copy=False)
-                    feature_indexes = np.repeat(
-                        np.flatnonzero(matching_features), lengths[matching_features]
-                    )

-                    np.add.at(canopy_area, postcode_indexes, area[feature_indexes])
-                    np.add.at(feature_count, postcode_indexes, 1)
-
-                    feature_height = height[feature_indexes]
-                    valid_height = np.isfinite(feature_height)
-                    if valid_height.any():
-                        height_area = area[feature_indexes][valid_height]
-                        np.add.at(
-                            height_weighted_sum,
-                            postcode_indexes[valid_height],
-                            feature_height[valid_height] * height_area,
-                        )
-                        np.add.at(
-                            height_weight,
-                            postcode_indexes[valid_height],
-                            height_area,
-                        )
-
-                total_features_used += len(area)
                if batch_index == 1 or batch_index % 25 == 0:
-                    print(
-                        f"  batch {batch_index:,}: "
-                        f"{total_features_seen:,} rows read, "
-                        f"{total_features_used:,} features with usable centroids"
-                    )
-
-
-def _postcode_buffers(
-    points: pl.DataFrame, radius_m: int
-) -> tuple[np.ndarray, shapely.STRtree]:
-    """Build a radius-r circle for every postcode plus an STRtree over them.
-
-    Circle index == postcode index, matching the order used by the cKDTree path.
-    """
-    xy = points.select("x", "y").to_numpy()
-    circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
-    return circles, shapely.STRtree(circles)
-
-
-def _add_nfi_batch(
-    geoms: np.ndarray,
-    category: np.ndarray,
-    circles: np.ndarray,
-    tree: shapely.STRtree,
-    canopy_area: np.ndarray,
-    feature_count: np.ndarray,
-    radius_m: int,
-) -> None:
-    """Add NFI woodland into the shared arrays by true buffer-clipped area.
-
-    Unlike the TOW centroid path, this clips each woodland polygon to each
-    nearby postcode circle and adds only area(polygon ∩ circle); a large parcel
-    therefore cannot saturate a postcode from mere centroid proximity, and a
-    buffer-filling parcel whose centroid is outside the radius is not missed.
-    """
-    keep = (category == NFI_WOODLAND_VALUE) & ~shapely.is_missing(geoms)
-    geoms = geoms[keep]
-    if geoms.size:
-        geoms = geoms[~shapely.is_empty(geoms)]
-    if geoms.size == 0:
-        return
-
-    # dwithin(polygon, point, r) is true iff the radius-r circle around the
-    # point intersects the polygon -- exactly the candidate set we want.
-    nfi_index, postcode_index = tree.query(
-        geoms, predicate="dwithin", distance=radius_m
-    )
-    if nfi_index.size == 0:
-        return
-
-    clipped_area = shapely.area(
-        shapely.intersection(geoms[nfi_index], circles[postcode_index])
-    )
-    positive = clipped_area > 0
-    postcode_index = postcode_index[positive]
-    clipped_area = clipped_area[positive]
-
-    np.add.at(canopy_area, postcode_index, clipped_area)
-    np.add.at(feature_count, postcode_index, 1)
+                    print(f"  batch {batch_index:,}: {total_features_seen:,} rows read")


 def _accumulate_nfi_metrics(
@ -411,8 +386,6 @@ def _accumulate_nfi_metrics(
    circles: np.ndarray,
    tree: shapely.STRtree,
    canopy_area: np.ndarray,
-    feature_count: np.ndarray,
-    radius_m: int,
    batch_size: int,
    max_nfi_features: int | None,
 ) -> None:
@ -455,14 +428,12 @@ def _accumulate_nfi_metrics(
                    ),
                    dtype=object,
                )
-                _add_nfi_batch(
-                    shapely.from_wkb(geometry),
-                    category,
+                geoms = shapely.from_wkb(geometry)
+                _accumulate_clipped_area(
+                    geoms[category == NFI_WOODLAND_VALUE],
                    circles,
                    tree,
                    canopy_area,
-                    feature_count,
-                    radius_m,
                )
                if batch_index == 1 or batch_index % 25 == 0:
                    print(f"  NFI batch {batch_index:,}: {features_seen:,} rows read")
@ -471,15 +442,26 @@ def _accumulate_nfi_metrics(
 def _finalize_metrics(
    points: pl.DataFrame,
    canopy_area: np.ndarray,
-    feature_count: np.ndarray,
    height_weighted_sum: np.ndarray,
    height_weight: np.ndarray,
    radius_m: int,
 ) -> pl.DataFrame:
    n_points = points.height
-    density_col, area_col, count_col, height_col = _metric_columns(radius_m)
+    density_col, area_col, height_col = _metric_columns(radius_m)
    buffer_area = math.pi * radius_m * radius_m
-    density_pct = np.minimum(canopy_area / buffer_area * 100.0, 100.0)
+    raw_density = canopy_area / buffer_area * 100.0
+    density_pct = np.minimum(raw_density, 100.0)
+
+    # Symptom of the assumed-disjoint TOW/NFI union being violated (or of
+    # overlapping crowns inside one buffer): clipped areas alone cannot exceed the
+    # buffer unless polygons overlap. Surface it rather than hide it behind the cap.
+    over_count = int(np.count_nonzero(raw_density > 100.0))
+    if over_count:
+        print(
+            f"  note: {over_count:,} postcode(s) exceeded 100% raw canopy and were "
+            "capped — indicates overlapping TOW/NFI canopy within the buffer"
+        )
+
    mean_height = np.divide(
        height_weighted_sum,
        height_weight,
@ -492,7 +474,6 @@ def _finalize_metrics(
            "postcode": points["postcode"],
            area_col: canopy_area.round(1).astype(np.float32),
            density_col: density_pct.round(1).astype(np.float32),
-            count_col: feature_count.astype(np.uint32),
            height_col: np.round(mean_height, 1).astype(np.float32),
        }
    ).with_columns(
@ -500,181 +481,9 @@ def _finalize_metrics(
    )


-def _clean_key_expr(column: str) -> pl.Expr:
-    return (
-        pl.col(column)
-        .fill_null("")
-        .str.to_uppercase()
-        .str.replace_all(r"[^A-Z0-9]+", " ")
-        .str.replace_all(r"\s+", " ")
-        .str.strip_chars()
-    )
-
-
-def _latest_price_paid_addresses(price_paid_path: Path) -> pl.LazyFrame:
-    return (
-        pl.scan_parquet(price_paid_path)
-        .select(
-            pl.col("postcode").str.strip_chars().str.to_uppercase().alias("postcode"),
-            "paon",
-            "saon",
-            "street",
-            "locality",
-            "town_city",
-            "district",
-            "county",
-            "date_of_transfer",
-        )
-        .filter(pl.col("postcode").is_not_null())
-        .filter(pl.col("street").is_not_null())
-        .filter(_clean_key_expr("street") != "")
-        .with_columns(
-            pl.concat_str(
-                [pl.col("saon"), pl.col("paon"), pl.col("street")],
-                separator=" ",
-                ignore_nulls=True,
-            )
-            .str.replace_all(r"\s+", " ")
-            .str.strip_chars()
-            .alias("pp_address"),
-        )
-        .filter(pl.col("pp_address").is_not_null())
-        .sort("date_of_transfer")
-        .group_by("postcode", "pp_address", maintain_order=True)
-        .agg(
-            pl.col("street").last(),
-            pl.col("locality").last(),
-            pl.col("town_city").last(),
-            pl.col("district").last(),
-            pl.col("county").last(),
-        )
-        .with_columns(
-            pl.concat_str(
-                [
-                    _clean_key_expr("street"),
-                    _clean_key_expr("town_city"),
-                    _clean_key_expr("district"),
-                    _clean_key_expr("county"),
-                ],
-                separator="|",
-            ).alias("street_key")
-        )
-    )
-
-
-def _weighted_mean_expr(column: str, weight: str) -> pl.Expr:
-    valid = pl.col(column).is_not_null() & ~pl.col(column).is_nan()
-    numerator = pl.when(valid).then(pl.col(column) * pl.col(weight)).sum()
-    denominator = pl.when(valid).then(pl.col(weight)).sum()
-    return pl.when(denominator > 0).then(numerator / denominator).otherwise(None)
-
-
-def _write_street_rollups(
-    postcode_metrics: pl.DataFrame,
-    price_paid_path: Path,
-    output_streets: Path | None,
-    output_addresses: Path | None,
-    radius_m: int,
-) -> None:
-    if output_streets is None and output_addresses is None:
-        return
-
-    density_col, area_col, count_col, height_col = _metric_columns(radius_m)
-    metrics = postcode_metrics.lazy()
-    addresses = _latest_price_paid_addresses(price_paid_path).join(
-        metrics, on="postcode", how="inner"
-    )
-
-    per_postcode = (
-        addresses.group_by(
-            "street_key",
-            "postcode",
-            "street",
-            "locality",
-            "town_city",
-            "district",
-            "county",
-        )
-        .agg(
-            pl.len().alias("address_count"),
-            pl.col(density_col).first(),
-            pl.col(area_col).first(),
-            pl.col(count_col).first(),
-            pl.col(height_col).first(),
-        )
-        .collect()
-    )
-
-    streets = (
-        per_postcode.lazy()
-        .group_by("street_key")
-        .agg(
-            pl.col("street").first(),
-            pl.col("locality").first(),
-            pl.col("town_city").first(),
-            pl.col("district").first(),
-            pl.col("county").first(),
-            pl.col("postcode").n_unique().alias("postcode_count"),
-            pl.col("address_count").sum().alias("address_count"),
-            _weighted_mean_expr(density_col, "address_count")
-            .round(1)
-            .cast(pl.Float32)
-            .alias(STREET_TREE_COVERAGE_COL),
-            _weighted_mean_expr(area_col, "address_count")
-            .round(1)
-            .cast(pl.Float32)
-            .alias(f"Street average {area_col}"),
-            _weighted_mean_expr(count_col, "address_count")
-            .round(1)
-            .cast(pl.Float32)
-            .alias(f"Street average {count_col}"),
-            _weighted_mean_expr(height_col, "address_count")
-            .round(1)
-            .cast(pl.Float32)
-            .alias(f"Street average {height_col}"),
-        )
-        .with_columns(
-            _coverage_percentile_expr(
-                STREET_TREE_COVERAGE_COL,
-                STREET_TREE_DENSITY_COL,
-            )
-        )
-        .sort("street_key")
-        .collect()
-    )
-
-    if output_addresses is not None:
-        output_addresses.parent.mkdir(parents=True, exist_ok=True)
-        address_output = addresses.join(
-            streets.lazy().select(
-                "street_key",
-                STREET_TREE_COVERAGE_COL,
-                STREET_TREE_DENSITY_COL,
-            ),
-            on="street_key",
-            how="left",
-        )
-        address_output.sink_parquet(output_addresses, compression="zstd")
-        print(f"Wrote address tree-density join: {output_addresses}")
-
-    if output_streets is not None:
-        output_streets.parent.mkdir(parents=True, exist_ok=True)
-        streets.write_parquet(output_streets, compression="zstd")
-        print(f"Wrote street tree-density rollup: {output_streets}")
-
-
-def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
-    if value is None:
-        return None
-    if value.lower() == "all":
-        return None
-    parts = tuple(part.strip() for part in value.split(",") if part.strip())
-    return parts or None
-
-
 def main() -> None:
    parser = argparse.ArgumentParser(
-        description="Build postcode and street tree-density metrics from FR_TOW_V1_ALL.zip"
+        description="Build postcode-level tree-density metrics from FR_TOW_V1_ALL.zip"
    )
    parser.add_argument(
        "--tow-zip",
@ -716,35 +525,17 @@ def main() -> None:
        default=Path("property-data/arcgis_data.parquet"),
        help="Postcode centroid parquet with east1m/north1m columns",
    )
-    parser.add_argument(
-        "--price-paid",
-        type=Path,
-        default=None,
-        help="Optional Price Paid parquet used to roll postcode metrics up to streets",
-    )
    parser.add_argument(
        "--output-postcodes",
        type=Path,
        required=True,
        help="Output postcode-level tree-density parquet",
    )
-    parser.add_argument(
-        "--output-streets",
-        type=Path,
-        default=None,
-        help="Optional output street-level tree-density parquet",
-    )
-    parser.add_argument(
-        "--output-addresses",
-        type=Path,
-        default=None,
-        help="Optional output address/street join parquet keyed by postcode and pp_address",
-    )
    parser.add_argument(
        "--radius-m",
        type=int,
        default=50,
-        help="Radius around each postcode centroid used as the street-scale buffer",
+        help="Radius around each postcode centroid used as the extended buffer",
    )
    parser.add_argument(
        "--layers",
@ -757,12 +548,6 @@ def main() -> None:
        default=65_536,
        help="Arrow batch size for reading TOW features",
    )
-    parser.add_argument(
-        "--workers",
-        type=int,
-        default=-1,
-        help="Worker count passed to scipy cKDTree.query_ball_point",
-    )
    parser.add_argument(
        "--max-postcodes",
        type=int,
@ -783,9 +568,6 @@ def main() -> None:
    )
    args = parser.parse_args()

-    if (args.output_streets or args.output_addresses) and args.price_paid is None:
-        raise SystemExit("--price-paid is required when writing street/address outputs")
-
    if args.radius_m <= 0:
        raise SystemExit("--radius-m must be greater than zero")

@ -797,36 +579,32 @@ def main() -> None:

    n_points = points.height
    canopy_area = np.zeros(n_points, dtype=np.float64)
-    feature_count = np.zeros(n_points, dtype=np.uint32)
    height_weighted_sum = np.zeros(n_points, dtype=np.float64)
    height_weight = np.zeros(n_points, dtype=np.float64)

-    _accumulate_tree_metrics(
+    circles, tree = _postcode_buffers(points, args.radius_m)
+
+    _accumulate_tow_metrics(
        dataset_path=dataset_path,
-        points=points,
-        radius_m=args.radius_m,
+        circles=circles,
+        tree=tree,
+        canopy_area=canopy_area,
+        height_weighted_sum=height_weighted_sum,
+        height_weight=height_weight,
        batch_size=args.batch_size,
        layer_names=layer_names,
        max_features_per_layer=args.max_features_per_layer,
-        workers=args.workers,
-        canopy_area=canopy_area,
-        feature_count=feature_count,
-        height_weighted_sum=height_weighted_sum,
-        height_weight=height_weight,
    )

    if args.nfi_zip is not None and args.nfi_zip.exists():
        nfi_path = _nfi_dataset_path(
            args.nfi_zip, args.nfi_extract_dir, args.force_extract, args.use_vsizip
        )
-        circles, nfi_tree = _postcode_buffers(points, args.radius_m)
        _accumulate_nfi_metrics(
            dataset_path=nfi_path,
            circles=circles,
-            tree=nfi_tree,
+            tree=tree,
            canopy_area=canopy_area,
-            feature_count=feature_count,
-            radius_m=args.radius_m,
            batch_size=args.batch_size,
            max_nfi_features=args.max_nfi_features,
        )
@ -836,7 +614,6 @@ def main() -> None:
    postcode_metrics = _finalize_metrics(
        points,
        canopy_area,
-        feature_count,
        height_weighted_sum,
        height_weight,
        args.radius_m,
@ -849,14 +626,14 @@ def main() -> None:
    postcode_metrics.write_parquet(args.output_postcodes, compression="zstd")
    print(f"\nWrote postcode tree-density metrics: {args.output_postcodes}")

-    if args.price_paid is not None:
-        _write_street_rollups(
-            postcode_metrics=postcode_metrics,
-            price_paid_path=args.price_paid,
-            output_streets=args.output_streets,
-            output_addresses=args.output_addresses,
-            radius_m=args.radius_m,
-        )
+
+def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
+    if value is None:
+        return None
+    if value.lower() == "all":
+        return None
+    parts = tuple(part.strip() for part in value.split(",") if part.strip())
+    return parts or None


 if __name__ == "__main__":
--- a/pipeline/transform/tree_overlay_tiles.py
+++ b/pipeline/transform/tree_overlay_tiles.py
@ -90,7 +90,7 @@ def _write_tree_geojsonseq(
                columns=columns,
                batch_size=batch_size,
                use_pyarrow=True,
-            ) as (_meta, reader):
+            ) as (meta, reader):
                for batch in reader:
                    if max_features_per_layer is not None:
                        remaining = max_features_per_layer - layer_features_seen
@ -101,6 +101,7 @@ def _write_tree_geojsonseq(

                    layer_features_seen += batch.num_rows
                    names = batch.schema.names
+                    geometry_column = _geometry_column(meta, names)
                    area = np.asarray(
                        batch.column(names.index("TOW_Area_M")).to_numpy(
                            zero_copy_only=False
@ -108,7 +109,7 @@ def _write_tree_geojsonseq(
                        dtype=np.float64,
                    )
                    geometry = np.asarray(
-                        batch.column(names.index("SHAPE")).to_numpy(
+                        batch.column(names.index(geometry_column)).to_numpy(
                            zero_copy_only=False
                        ),
                        dtype=object,
@ -327,7 +328,7 @@ def build_tree_overlay_tiles(
                str(min_zoom),
                "--maximum-zoom",
                str(max_zoom),
-                "--drop-smallest-as-needed",
+                "--coalesce-smallest-as-needed",
                "--extend-zooms-if-still-dropping",
                "--temporary-directory",
                tmp,
--- a/pipeline/utils/postcode_mapping.py
+++ b/pipeline/utils/postcode_mapping.py
@ -13,7 +13,11 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
    Uses OS National Grid coordinates (east1m, north1m) which are Cartesian metres,
    so Euclidean distance via cKDTree gives accurate results without projection.
    """
-    arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry25cd") == "E92000001")
+    arcgis = (
+        pl.scan_parquet(arcgis_path)
+        .filter(pl.col("ctry25cd") == "E92000001")
+        .with_columns(pl.col("doterm").cast(pl.Utf8).alias("doterm"))
+    )

    active = (
        arcgis.filter(pl.col("doterm").is_null())
--- a/pipeline/validate_outputs.py
+++ b/pipeline/validate_outputs.py
@ -9,6 +9,8 @@ import zipfile
 from pathlib import Path

 import polars as pl
+from shapely.geometry import shape
+from shapely.validation import explain_validity


 def _failures_for_file(path: Path) -> list[str]:
@ -79,9 +81,7 @@ def _split_glob(spec: str) -> tuple[Path, str]:

 def _split_pair(spec: str, label: str) -> tuple[Path, Path]:
    if "::" not in spec:
-        raise argparse.ArgumentTypeError(
-            f"{spec!r} must use LEFT::RIGHT for {label}"
-        )
+        raise argparse.ArgumentTypeError(f"{spec!r} must use LEFT::RIGHT for {label}")
    left, right = spec.split("::", 1)
    if not left or not right:
        raise argparse.ArgumentTypeError(f"{spec!r} must include both paths")
@ -143,22 +143,140 @@ def _parquet_postcodes(path: Path) -> set[str]:
        .get_column(column)
        .to_list()
    )
-    return {_canonical_postcode(value) for value in values if _canonical_postcode(value)}
+    return {
+        _canonical_postcode(value) for value in values if _canonical_postcode(value)
+    }
+
+
+def _active_english_arcgis_postcodes(path: Path) -> set[str]:
+    schema = pl.scan_parquet(path).collect_schema()
+    required = {"pcds", "ctry25cd", "doterm"}
+    missing = sorted(required - set(schema.names()))
+    if missing:
+        raise ValueError(f"{path}: missing ArcGIS postcode columns: {missing}")
+    values = (
+        pl.read_parquet(path, columns=["pcds", "ctry25cd", "doterm"])
+        .lazy()
+        .filter(pl.col("ctry25cd") == "E92000001")
+        .filter(pl.col("doterm").cast(pl.Utf8).is_null())
+        .select(pl.col("pcds").drop_nulls().unique())
+        .collect()
+        .get_column("pcds")
+        .to_list()
+    )
+    return {
+        _canonical_postcode(value) for value in values if _canonical_postcode(value)
+    }
+
+
+def _format_samples(samples: list[str]) -> str:
+    return "; ".join(samples[:10])
+
+
+def _boundary_postcode_scan(path: Path) -> tuple[set[str], list[str]]:
+    units_dir = path / "units" if (path / "units").is_dir() else path
+    postcodes: set[str] = set()
+    seen: dict[str, str] = {}
+    failures: list[str] = []
+    missing_postcode_samples: list[str] = []
+    missing_geometry_samples: list[str] = []
+    non_polygon_samples: list[str] = []
+    invalid_geometry_samples: list[str] = []
+    duplicate_samples: list[str] = []
+    missing_postcode_count = 0
+    missing_geometry_count = 0
+    non_polygon_count = 0
+    invalid_geometry_count = 0
+    duplicate_count = 0
+
+    for geojson_path in sorted(units_dir.glob("*.geojson")):
+        try:
+            with geojson_path.open("r", encoding="utf-8") as handle:
+                data = json.load(handle)
+        except Exception as exc:
+            failures.append(f"{geojson_path}: unreadable GeoJSON: {exc}")
+            continue
+
+        for idx, feature in enumerate(data.get("features", [])):
+            label = f"{geojson_path.name} feature {idx}"
+            properties = feature.get("properties") or {}
+            value = properties.get("postcodes")
+            postcode = _canonical_postcode(value) if value is not None else ""
+            if not postcode:
+                missing_postcode_count += 1
+                if len(missing_postcode_samples) < 10:
+                    missing_postcode_samples.append(label)
+            else:
+                if postcode in seen:
+                    duplicate_count += 1
+                    if len(duplicate_samples) < 10:
+                        duplicate_samples.append(
+                            f"{postcode} in {seen[postcode]} and {label}"
+                        )
+                else:
+                    seen[postcode] = label
+                postcodes.add(postcode)
+
+            geometry_data = feature.get("geometry")
+            if geometry_data is None:
+                missing_geometry_count += 1
+                if len(missing_geometry_samples) < 10:
+                    missing_geometry_samples.append(f"{postcode or label}")
+                continue
+            try:
+                geom = shape(geometry_data)
+            except Exception as exc:
+                invalid_geometry_count += 1
+                if len(invalid_geometry_samples) < 10:
+                    invalid_geometry_samples.append(f"{postcode or label}: {exc}")
+                continue
+            if geom.is_empty:
+                missing_geometry_count += 1
+                if len(missing_geometry_samples) < 10:
+                    missing_geometry_samples.append(f"{postcode or label}: empty")
+            elif geom.geom_type not in {"Polygon", "MultiPolygon"}:
+                non_polygon_count += 1
+                if len(non_polygon_samples) < 10:
+                    non_polygon_samples.append(f"{postcode or label}: {geom.geom_type}")
+            elif not geom.is_valid:
+                invalid_geometry_count += 1
+                if len(invalid_geometry_samples) < 10:
+                    invalid_geometry_samples.append(
+                        f"{postcode or label}: {explain_validity(geom)}"
+                    )
+
+    if missing_postcode_count:
+        failures.append(
+            f"{path}: {missing_postcode_count:,} boundary features are missing "
+            f"properties.postcodes; sample: {_format_samples(missing_postcode_samples)}"
+        )
+    if duplicate_count:
+        failures.append(
+            f"{path}: {duplicate_count:,} duplicate boundary postcode features; "
+            f"sample: {_format_samples(duplicate_samples)}"
+        )
+    if missing_geometry_count:
+        failures.append(
+            f"{path}: {missing_geometry_count:,} boundary features are missing or empty "
+            f"geometry; sample: {_format_samples(missing_geometry_samples)}"
+        )
+    if non_polygon_count:
+        failures.append(
+            f"{path}: {non_polygon_count:,} boundary features are not polygonal; "
+            f"sample: {_format_samples(non_polygon_samples)}"
+        )
+    if invalid_geometry_count:
+        failures.append(
+            f"{path}: {invalid_geometry_count:,} invalid boundary geometries; "
+            f"sample: {_format_samples(invalid_geometry_samples)}"
+        )
+    return postcodes, failures


 def _boundary_postcodes(path: Path) -> set[str]:
-    units_dir = path / "units" if (path / "units").is_dir() else path
-    postcodes: set[str] = set()
-    for geojson_path in sorted(units_dir.glob("*.geojson")):
-        with geojson_path.open("r", encoding="utf-8") as handle:
-            data = json.load(handle)
-        for feature in data.get("features", []):
-            properties = feature.get("properties") or {}
-            value = properties.get("postcodes")
-            if value is not None:
-                postcode = _canonical_postcode(value)
-                if postcode:
-                    postcodes.add(postcode)
+    postcodes, failures = _boundary_postcode_scan(path)
+    if failures:
+        raise ValueError("; ".join(failures))
    return postcodes


@ -174,11 +292,13 @@ def _failures_for_postcode_boundary_match(spec: str) -> list[str]:

    try:
        parquet_postcodes = _parquet_postcodes(parquet_path)
-        boundary_postcodes = _boundary_postcodes(boundaries_path)
+        boundary_postcodes, boundary_failures = _boundary_postcode_scan(boundaries_path)
    except Exception as exc:
-        return [f"{parquet_path} / {boundaries_path}: postcode match check failed: {exc}"]
+        return [
+            f"{parquet_path} / {boundaries_path}: postcode match check failed: {exc}"
+        ]

-    failures = []
+    failures = list(boundary_failures)
    if not boundary_postcodes:
        failures.append(f"{boundaries_path}: no boundary postcodes found")

@ -197,6 +317,41 @@ def _failures_for_postcode_boundary_match(spec: str) -> list[str]:
    return failures


+def _failures_for_active_postcode_boundary_match(spec: str) -> list[str]:
+    arcgis_path, boundaries_path = _split_pair(
+        spec, "active postcode boundary matching"
+    )
+    failures = _failures_for_parquet(arcgis_path) + _failures_for_dir(boundaries_path)
+    if failures:
+        return failures
+
+    try:
+        active_postcodes = _active_english_arcgis_postcodes(arcgis_path)
+        boundary_postcodes, boundary_failures = _boundary_postcode_scan(boundaries_path)
+    except Exception as exc:
+        return [
+            f"{arcgis_path} / {boundaries_path}: active postcode boundary check failed: {exc}"
+        ]
+
+    failures = list(boundary_failures)
+    if not boundary_postcodes:
+        failures.append(f"{boundaries_path}: no boundary postcodes found")
+
+    missing_boundaries = active_postcodes - boundary_postcodes
+    orphan_boundaries = boundary_postcodes - active_postcodes
+    if missing_boundaries:
+        failures.append(
+            f"{boundaries_path}: {len(missing_boundaries):,} active English postcodes "
+            f"from {arcgis_path} are missing boundaries; sample: {_sample(missing_boundaries)}"
+        )
+    if orphan_boundaries:
+        failures.append(
+            f"{boundaries_path}: {len(orphan_boundaries):,} boundary postcodes are not "
+            f"active English postcodes in {arcgis_path}; sample: {_sample(orphan_boundaries)}"
+        )
+    return failures
+
+
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--file", action="append", default=[], type=Path)
@ -221,6 +376,15 @@ def main() -> int:
        default=[],
        help="Require postcode parquet keys to exactly match boundary GeoJSON postcodes: PARQUET::DIR",
    )
+    parser.add_argument(
+        "--active-postcode-boundary-match",
+        action="append",
+        default=[],
+        help=(
+            "Require active English ArcGIS postcodes to exactly match boundary "
+            "GeoJSON postcodes: ARCGIS_PARQUET::DIR"
+        ),
+    )
    args = parser.parse_args()

    failures: list[str] = []
@ -238,6 +402,8 @@ def main() -> int:
        failures.extend(_failures_for_zip_glob(spec))
    for spec in args.postcode_boundary_match:
        failures.extend(_failures_for_postcode_boundary_match(spec))
+    for spec in args.active_postcode_boundary_match:
+        failures.extend(_failures_for_active_postcode_boundary_match(spec))

    if failures:
        print("Output validation failed:", file=sys.stderr)
--- a/server-rs/src/routes/stats.rs
+++ b/server-rs/src/routes/stats.rs
@ -282,17 +282,23 @@ pub fn compute_crime_by_year(

    for &row in matching_rows {
        let postcode = data.postcode(row);
-        let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) else {
-            continue;
-        };

-        // For every type the postcode reports, add its per-year counts.
-        // For types it doesn't report, treat the row as contributing 0 — so we
-        // bump the row count for *every* known type below.
-        for series in series_list {
-            let acc = &mut per_type_year_sums[series.type_idx as usize];
-            for point in &series.points {
-                *acc.entry(point.year).or_insert(0.0) += point.count as f64;
+        // A postcode absent from the by-year table has no recorded crime within
+        // 50m, so it contributes 0 to every type's per-year sum. It must still be
+        // counted in the denominator: the matching `(avg/yr)` stat counts those
+        // same zero-crime postcodes as 0.0 (crime_by_postcode.parquet has a dense
+        // row for every boundary postcode), so excluding them here would compute
+        // the chart over a smaller population and report a higher magnitude than
+        // the headline. Property postcodes are guaranteed to be boundary
+        // postcodes by the postcode-boundary-match validation, so "absent" means
+        // genuinely zero-crime, not missing data.
+        if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
+            // For every type the postcode reports, add its per-year counts.
+            for series in series_list {
+                let acc = &mut per_type_year_sums[series.type_idx as usize];
+                for point in &series.points {
+                    *acc.entry(point.year).or_insert(0.0) += point.count as f64;
+                }
            }
        }
        for c in per_type_row_counts.iter_mut() {