improve
This commit is contained in:
parent
8688b7475e
commit
e8345cbdc1
40 changed files with 1980 additions and 904 deletions
|
|
@ -64,8 +64,6 @@ PBF := $(DATA_DIR)/england-latest.osm.pbf
|
|||
FR_TOW := $(DATA_DIR)/FR_TOW_V1_ALL.zip
|
||||
NFI := $(DATA_DIR)/NFI_WOODLAND_ENGLAND.zip
|
||||
TREE_DENSITY_PC := $(DATA_DIR)/tree_density_by_postcode.parquet
|
||||
TREE_DENSITY_STREETS := $(DATA_DIR)/tree_density_by_street.parquet
|
||||
TREE_DENSITY_ADDR := $(DATA_DIR)/tree_density_by_address.parquet
|
||||
OFS_REGISTER := $(DATA_DIR)/ofs_register.xlsx
|
||||
PLACES := $(DATA_DIR)/places.parquet
|
||||
MEDIAN_AGE := $(DATA_DIR)/median_age.parquet
|
||||
|
|
@ -183,6 +181,7 @@ $(PC_BOUNDARIES_STAMP): $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP) $(ARCGI
|
|||
--oa-boundaries $(OA_BOUNDARIES) \
|
||||
--inspire $(INSPIRE_DIR) \
|
||||
--output $(PC_BOUNDARIES)
|
||||
$(VALIDATE_OUTPUTS) --active-postcode-boundary-match "$(ARCGIS)::$(PC_BOUNDARIES)"
|
||||
@touch $@
|
||||
generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
|
||||
@if [ -f "$(R5_NETWORK_CACHE)" ] && { [ "$(PBF)" -nt "$(R5_NETWORK_CACHE)" ] || [ "$(TRANSIT_STAMP)" -nt "$(R5_NETWORK_CACHE)" ]; }; then \
|
||||
|
|
@ -358,7 +357,7 @@ $(POIS_FILTERED): $(POIS_RAW) $(NAPTAN) $(GROCERY_RETAIL_POINTS) $(GIAS) $(OFSTE
|
|||
$(EPC_PP): $(PRICE_PAID) $(EPC) pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
|
||||
uv run python -m pipeline.transform.join_epc_pp --epc $(EPC) --price-paid $(PRICE_PAID) --output $@
|
||||
|
||||
$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES) pipeline/transform/crime_spatial.py pipeline/transform/postcode_boundaries/loader.py pipeline/transform/crime.py
|
||||
$(CRIME) $(CRIME_BY_YEAR) &: $(CRIME_STAMP) $(PC_BOUNDARIES_STAMP) pipeline/transform/crime_spatial.py pipeline/transform/postcode_boundaries/loader.py pipeline/transform/crime.py
|
||||
$(VALIDATE_OUTPUTS) --file $(CRIME_DIR)/archive_manifest.json --glob "$(CRIME_DIR)::**/*-street.csv"
|
||||
uv run python -m pipeline.transform.crime_spatial --input $(CRIME_DIR) --boundaries $(PC_BOUNDARIES)/units --output $(CRIME) --output-by-year $(CRIME_BY_YEAR)
|
||||
|
||||
|
|
@ -368,15 +367,12 @@ $(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE) $(POI_PROXIMITY_DE
|
|||
$(SCHOOL_PROX): $(OFSTED) $(ARCGIS) pipeline/transform/school_proximity.py pipeline/utils/poi_counts.py
|
||||
uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@
|
||||
|
||||
$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(PRICE_PAID) $(TREE_DENSITY_DEPS)
|
||||
$(TREE_DENSITY_PC): $(FR_TOW) $(NFI) $(ARCGIS) $(TREE_DENSITY_DEPS)
|
||||
uv run python -m pipeline.transform.tree_density \
|
||||
--tow-zip $(FR_TOW) \
|
||||
--nfi-zip $(NFI) \
|
||||
--arcgis $(ARCGIS) \
|
||||
--price-paid $(PRICE_PAID) \
|
||||
--output-postcodes $(TREE_DENSITY_PC) \
|
||||
--output-streets $(TREE_DENSITY_STREETS) \
|
||||
--output-addresses $(TREE_DENSITY_ADDR)
|
||||
--output-postcodes $(TREE_DENSITY_PC)
|
||||
|
||||
# Postcode boundaries require manual generation — fail with instructions
|
||||
$(PC_BOUNDARIES):
|
||||
|
|
|
|||
|
|
@ -81,6 +81,15 @@ function isProtectedPage(page: Page): boolean {
|
|||
return page === 'account' || page === 'saved';
|
||||
}
|
||||
|
||||
function isSharedDashboardUrl(): boolean {
|
||||
const share = new URLSearchParams(window.location.search).get('share');
|
||||
return !!share && /^[a-z0-9]{1,20}$/i.test(share);
|
||||
}
|
||||
|
||||
function isAuthRequiredRoute(page: Page): boolean {
|
||||
return isProtectedPage(page) || (page === 'dashboard' && !isSharedDashboardUrl());
|
||||
}
|
||||
|
||||
function buildPageUrl(page: Page, inviteCode?: string, search = '', hash = ''): string {
|
||||
const normalizedHash = normalizeHash(hash);
|
||||
return `${pageToPath(page, inviteCode)}${search}${normalizedHash ? `#${normalizedHash}` : ''}`;
|
||||
|
|
@ -235,6 +244,7 @@ export default function App() {
|
|||
const postAuthCheckoutReturnPathRef = useRef<string | null>(null);
|
||||
const authCompletedRef = useRef(false);
|
||||
const [licenseSuccessStatus, setLicenseSuccessStatus] = useState<LicenseSuccessStatus>('hidden');
|
||||
const [dashboardReady, setDashboardReady] = useState(false);
|
||||
|
||||
// Keep a ref to the latest refreshAuth so the mount-only startup effect always
|
||||
// calls the current implementation without re-running when the callback identity changes.
|
||||
|
|
@ -266,7 +276,7 @@ export default function App() {
|
|||
if (!completed) {
|
||||
setPostAuthIntent(null);
|
||||
postAuthCheckoutReturnPathRef.current = null;
|
||||
if (isProtectedPage(activePageRef.current)) {
|
||||
if (isAuthRequiredRoute(activePageRef.current)) {
|
||||
window.history.replaceState({ page: 'home', hash: '' }, '', '/');
|
||||
setRouteHash('');
|
||||
setActivePage('home');
|
||||
|
|
@ -517,7 +527,10 @@ export default function App() {
|
|||
}
|
||||
}, [activePage, fetchSearches]);
|
||||
|
||||
const isAuthRequiredPage = activePage === 'account' || activePage === 'saved';
|
||||
const isAuthRequiredPage =
|
||||
activePage === 'account' ||
|
||||
activePage === 'saved' ||
|
||||
(activePage === 'dashboard' && !mapUrlState.share);
|
||||
useEffect(() => {
|
||||
if (authLoading) return;
|
||||
if (isAuthRequiredPage && !user) {
|
||||
|
|
@ -530,6 +543,13 @@ export default function App() {
|
|||
|
||||
const [exportState, setExportState] = useState<ExportState | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (activePage !== 'dashboard' || !user) {
|
||||
setDashboardReady(false);
|
||||
setExportState(null);
|
||||
}
|
||||
}, [activePage, user]);
|
||||
|
||||
if ((isScreenshotMode || isOgMode) && inviteCode) {
|
||||
return (
|
||||
<Suspense fallback={<PageFallback />}>
|
||||
|
|
@ -584,8 +604,9 @@ export default function App() {
|
|||
onPageChange={navigateTo}
|
||||
theme={theme}
|
||||
onToggleTheme={toggleTheme}
|
||||
exportState={activePage === 'dashboard' ? exportState : null}
|
||||
exportState={activePage === 'dashboard' && user ? exportState : null}
|
||||
dashboardParams={activePage === 'dashboard' ? dashboardParams : ''}
|
||||
dashboardActionsDisabled={activePage === 'dashboard' && !dashboardReady}
|
||||
onSaveSearch={
|
||||
activePage === 'dashboard' && user
|
||||
? editingSearch
|
||||
|
|
@ -675,6 +696,7 @@ export default function App() {
|
|||
onNavigateTo={navigateTo}
|
||||
onExportStateChange={setExportState}
|
||||
onDashboardParamsChange={setDashboardParams}
|
||||
onDashboardReadyChange={setDashboardReady}
|
||||
isMobile={isMobile}
|
||||
initialTravelTime={mapUrlState.travelTime}
|
||||
initialPostcode={mapUrlState.postcode}
|
||||
|
|
|
|||
|
|
@ -461,6 +461,24 @@ interface ShareLinkListItem {
|
|||
created: string;
|
||||
}
|
||||
|
||||
function latestPendingInviteUrls(invites: InviteListItem[]): Record<string, string> {
|
||||
const latestByType: Record<string, { url: string; createdMs: number }> = {};
|
||||
|
||||
for (const invite of invites) {
|
||||
if (invite.used || !invite.url) continue;
|
||||
|
||||
const createdMs = Date.parse(invite.created) || 0;
|
||||
const existing = latestByType[invite.invite_type];
|
||||
if (!existing || createdMs > existing.createdMs) {
|
||||
latestByType[invite.invite_type] = { url: invite.url, createdMs };
|
||||
}
|
||||
}
|
||||
|
||||
return Object.fromEntries(
|
||||
Object.entries(latestByType).map(([type, invite]) => [type, invite.url])
|
||||
);
|
||||
}
|
||||
|
||||
function InviteTable({
|
||||
invites,
|
||||
loading,
|
||||
|
|
@ -673,7 +691,16 @@ function InviteSection({ user }: { user: AuthUser }) {
|
|||
const res = await fetch(apiUrl('invites'), authHeaders());
|
||||
assertOk(res, 'Fetch invites');
|
||||
const data = await res.json();
|
||||
setInviteHistory(data.invites);
|
||||
const invites: InviteListItem[] = Array.isArray(data.invites) ? data.invites : [];
|
||||
setInviteHistory(invites);
|
||||
const pendingInviteUrls = latestPendingInviteUrls(invites);
|
||||
setInviteUrl((prev) => {
|
||||
const next = { ...prev };
|
||||
for (const [type, url] of Object.entries(pendingInviteUrls)) {
|
||||
if (!next[type]) next[type] = url;
|
||||
}
|
||||
return next;
|
||||
});
|
||||
} catch {
|
||||
// Silent — non-critical
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -8,8 +8,11 @@ const RECENT_SEARCHES_STORAGE_KEY = 'perfect-postcode.locationSearch.recent';
|
|||
|
||||
vi.mock('react-i18next', () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string) =>
|
||||
key === 'locationSearch.placeholder' ? 'Search places or postcodes...' : key,
|
||||
t: (key: string) => {
|
||||
if (key === 'locationSearch.placeholder') return 'Search places or postcodes...';
|
||||
if (key === 'locationSearch.noResults') return 'No matching places or postcodes';
|
||||
return key;
|
||||
},
|
||||
}),
|
||||
}));
|
||||
|
||||
|
|
@ -226,6 +229,91 @@ describe('LocationSearch', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('selects the first place suggestion with Enter when none is highlighted', async () => {
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn((input: string | URL | Request) => {
|
||||
const url = new URL(String(input), 'http://localhost');
|
||||
if (url.pathname === '/api/places') {
|
||||
return Promise.resolve(
|
||||
jsonResponse({
|
||||
places: [
|
||||
{
|
||||
type: 'place',
|
||||
name: 'London',
|
||||
slug: 'london',
|
||||
place_type: 'city',
|
||||
lat: 51.5074,
|
||||
lon: -0.1278,
|
||||
},
|
||||
],
|
||||
postcodes: [],
|
||||
addresses: [],
|
||||
})
|
||||
);
|
||||
}
|
||||
if (url.pathname === '/api/nearest-postcode') {
|
||||
return Promise.resolve(
|
||||
jsonResponse({
|
||||
postcode: 'SW1A 1AA',
|
||||
latitude: 51.501,
|
||||
longitude: -0.141,
|
||||
geometry: postcodeGeometry,
|
||||
})
|
||||
);
|
||||
}
|
||||
return Promise.resolve(new Response(null, { status: 404 }));
|
||||
})
|
||||
);
|
||||
|
||||
const onFlyTo = vi.fn();
|
||||
const onLocationSearched = vi.fn();
|
||||
render(<LocationSearch onFlyTo={onFlyTo} onLocationSearched={onLocationSearched} />);
|
||||
|
||||
const input = screen.getByRole('textbox');
|
||||
fireEvent.change(input, { target: { value: 'London' } });
|
||||
|
||||
await screen.findByRole('button', { name: 'London' });
|
||||
fireEvent.keyDown(input, { key: 'Enter' });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(onLocationSearched).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
expect(onFlyTo).toHaveBeenCalledWith(51.5074, -0.1278, 10);
|
||||
expect(onLocationSearched).toHaveBeenCalledWith({
|
||||
postcode: 'SW1A 1AA',
|
||||
geometry: postcodeGeometry,
|
||||
latitude: 51.501,
|
||||
longitude: -0.141,
|
||||
zoom: 10,
|
||||
markerLatitude: 51.5074,
|
||||
markerLongitude: -0.1278,
|
||||
});
|
||||
});
|
||||
|
||||
it('shows an empty state for invalid place queries', async () => {
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn(() =>
|
||||
Promise.resolve(
|
||||
jsonResponse({
|
||||
places: [],
|
||||
postcodes: [],
|
||||
addresses: [],
|
||||
})
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
render(<LocationSearch onFlyTo={vi.fn()} onLocationSearched={vi.fn()} />);
|
||||
|
||||
fireEvent.change(screen.getByRole('textbox'), { target: { value: '!!!!zzzzzz!!!!' } });
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('No matching places or postcodes')).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
it('keeps only the three most recent local searches', async () => {
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
|
|
|
|||
|
|
@ -333,6 +333,8 @@ export default function LocationSearch({
|
|||
onSelect={selectResult}
|
||||
loading={loading}
|
||||
placeholder={t('locationSearch.placeholder')}
|
||||
ariaLabel={t('locationSearch.searchLabel')}
|
||||
name="location-search"
|
||||
size="sm"
|
||||
inputClassName={
|
||||
inputClassName ??
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ export default function MapPage({
|
|||
onNavigateTo,
|
||||
onExportStateChange,
|
||||
onDashboardParamsChange,
|
||||
onDashboardReadyChange,
|
||||
screenshotMode,
|
||||
ogMode,
|
||||
isMobile = false,
|
||||
|
|
@ -642,6 +643,23 @@ export default function MapPage({
|
|||
onDashboardParamsChange?.(dashboardParams);
|
||||
}, [dashboardParams, onDashboardParamsChange]);
|
||||
|
||||
const dashboardReady =
|
||||
!initialLoading &&
|
||||
!mapData.loading &&
|
||||
!mapData.licenseRequired &&
|
||||
mapData.bounds != null &&
|
||||
mapData.currentView != null;
|
||||
|
||||
useEffect(() => {
|
||||
onDashboardReadyChange?.(dashboardReady);
|
||||
}, [dashboardReady, onDashboardReadyChange]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
onDashboardReadyChange?.(false);
|
||||
};
|
||||
}, [onDashboardReadyChange]);
|
||||
|
||||
useEffect(() => {
|
||||
if (mapData.licenseRequired) trackEvent('Upgrade Modal Shown');
|
||||
}, [mapData.licenseRequired]);
|
||||
|
|
@ -830,8 +848,8 @@ export default function MapPage({
|
|||
</button>
|
||||
<button
|
||||
onClick={() => onUpdateEdit?.(dashboardParams)}
|
||||
disabled={savingSearch}
|
||||
className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium bg-teal-600 text-white hover:bg-teal-700 disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
|
||||
disabled={savingSearch || !dashboardReady}
|
||||
className="shrink-0 cursor-pointer px-2.5 py-1 rounded text-xs font-medium bg-teal-600 text-white hover:bg-teal-700 disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
|
||||
>
|
||||
{savingSearch ? t('savedPage.updating') : t('common.update')}
|
||||
</button>
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ export default function POIPane({
|
|||
</div>
|
||||
{!isCollapsed && (
|
||||
<div className="px-3 py-2">
|
||||
<PillGroup>
|
||||
<PillGroup wrap>
|
||||
{group.categories.map((category) => {
|
||||
const logo = getPoiCategoryLogoUrl(category);
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -269,7 +269,10 @@ export function DesktopMapPage({
|
|||
</div>
|
||||
)}
|
||||
{poiPaneOpen && (
|
||||
<div className="absolute bottom-28 right-4 z-10 flex h-[60vh] min-h-0 w-80 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
|
||||
<div
|
||||
className="absolute bottom-28 right-4 z-10 flex min-h-0 w-80 max-w-[calc(100%_-_2rem)] flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900"
|
||||
style={{ height: 'min(30rem, calc(100vh - 10rem))' }}
|
||||
>
|
||||
{poiPane}
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -132,6 +132,10 @@ export function MobileMapPage({
|
|||
upgradeModal,
|
||||
editingBar,
|
||||
}: MobileMapPageProps) {
|
||||
const floatingPaneAvailableHeight = `max(12rem, calc(100dvh - ${Math.ceil(
|
||||
bottomScreenInset
|
||||
)}px - 7rem))`;
|
||||
|
||||
return (
|
||||
<div className="flex-1 overflow-hidden relative">
|
||||
<LoadingOverlay show={initialLoading} />
|
||||
|
|
@ -219,7 +223,13 @@ export function MobileMapPage({
|
|||
)}
|
||||
|
||||
{poiPaneOpen && (
|
||||
<div className="absolute top-24 right-3 left-3 z-20 flex h-[45dvh] min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
|
||||
<div
|
||||
className="absolute top-24 right-3 left-3 z-20 flex min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900"
|
||||
style={{
|
||||
height: `min(22rem, ${floatingPaneAvailableHeight})`,
|
||||
maxHeight: floatingPaneAvailableHeight,
|
||||
}}
|
||||
>
|
||||
{poiPane}
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ export interface MapPageProps {
|
|||
onNavigateTo: (page: Page, hash?: string, infoFeature?: string) => void;
|
||||
onExportStateChange?: (state: ExportState) => void;
|
||||
onDashboardParamsChange?: (params: string) => void;
|
||||
onDashboardReadyChange?: (ready: boolean) => void;
|
||||
screenshotMode?: boolean;
|
||||
ogMode?: boolean;
|
||||
isMobile?: boolean;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { useState, useCallback, useEffect } from 'react';
|
||||
import { useState, useCallback, useEffect, useId } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { CloseIcon } from './icons/CloseIcon';
|
||||
import { GoogleIcon } from './icons/GoogleIcon';
|
||||
|
|
@ -36,6 +36,9 @@ export default function AuthModal({
|
|||
const [password, setPassword] = useState('');
|
||||
const [resetSent, setResetSent] = useState(false);
|
||||
const dialogRef = useModalA11y();
|
||||
const fieldId = useId();
|
||||
const emailInputId = `${fieldId}-email`;
|
||||
const passwordInputId = `${fieldId}-password`;
|
||||
|
||||
useEffect(() => {
|
||||
trackEvent('Auth Modal Open', { tab: initialTab });
|
||||
|
|
@ -194,14 +197,20 @@ export default function AuthModal({
|
|||
{/* Email form */}
|
||||
<form onSubmit={handleSubmit} className="space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1">
|
||||
<label
|
||||
htmlFor={emailInputId}
|
||||
className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1"
|
||||
>
|
||||
{t('auth.email')}
|
||||
</label>
|
||||
<input
|
||||
id={emailInputId}
|
||||
name="email"
|
||||
type="email"
|
||||
value={email}
|
||||
onChange={(e) => setEmail(e.target.value)}
|
||||
required
|
||||
autoComplete="email"
|
||||
className="w-full px-3 py-2 text-sm rounded border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-800 text-navy-950 dark:text-white placeholder-warm-400 dark:placeholder-warm-500 outline-none focus:ring-2 ring-teal-400 dark:ring-teal-500"
|
||||
placeholder={t('auth.emailPlaceholder')}
|
||||
/>
|
||||
|
|
@ -209,15 +218,21 @@ export default function AuthModal({
|
|||
|
||||
{view !== 'forgot' && (
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1">
|
||||
<label
|
||||
htmlFor={passwordInputId}
|
||||
className="block text-sm font-medium text-warm-700 dark:text-warm-300 mb-1"
|
||||
>
|
||||
{t('auth.password')}
|
||||
</label>
|
||||
<input
|
||||
id={passwordInputId}
|
||||
name="password"
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
required
|
||||
minLength={8}
|
||||
autoComplete={view === 'register' ? 'new-password' : 'current-password'}
|
||||
className="w-full px-3 py-2 text-sm rounded border border-warm-200 dark:border-warm-700 bg-white dark:bg-warm-800 text-navy-950 dark:text-white placeholder-warm-400 dark:placeholder-warm-500 outline-none focus:ring-2 ring-teal-400 dark:ring-teal-500"
|
||||
placeholder={
|
||||
view === 'register'
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ export default function Header({
|
|||
onToggleTheme,
|
||||
exportState,
|
||||
dashboardParams,
|
||||
dashboardActionsDisabled = false,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
editingSearch,
|
||||
|
|
@ -96,6 +97,7 @@ export default function Header({
|
|||
onToggleTheme: () => void;
|
||||
exportState: HeaderExportState | null;
|
||||
dashboardParams: string;
|
||||
dashboardActionsDisabled?: boolean;
|
||||
onSaveSearch: (() => void) | null;
|
||||
savingSearch: boolean;
|
||||
editingSearch: EditingSearchState | null;
|
||||
|
|
@ -116,6 +118,7 @@ export default function Header({
|
|||
() => window.matchMedia(DASHBOARD_TABLET_SIDEBAR_QUERY).matches
|
||||
);
|
||||
const useSidebarNav = isMobile || (activePage === 'dashboard' && isDashboardTabletSidebarWidth);
|
||||
const dashboardActionsBlocked = activePage === 'dashboard' && (!user || dashboardActionsDisabled);
|
||||
|
||||
useEffect(() => {
|
||||
const mql = window.matchMedia(DASHBOARD_TABLET_SIDEBAR_QUERY);
|
||||
|
|
@ -139,6 +142,10 @@ export default function Header({
|
|||
if (!useSidebarNav) setMenuOpen(false);
|
||||
}, [useSidebarNav]);
|
||||
|
||||
useEffect(() => {
|
||||
if (dashboardActionsBlocked) setExportMenuOpen(false);
|
||||
}, [dashboardActionsBlocked]);
|
||||
|
||||
const doCopy = useCallback((text: string) => {
|
||||
copyToClipboard(text, () => {
|
||||
setCopied(true);
|
||||
|
|
@ -147,6 +154,7 @@ export default function Header({
|
|||
}, []);
|
||||
|
||||
const handleShare = useCallback(async () => {
|
||||
if (dashboardActionsBlocked) return;
|
||||
const params =
|
||||
activePage === 'dashboard' ? dashboardParams : window.location.search.replace(/^\?/, '');
|
||||
if (!params) {
|
||||
|
|
@ -167,7 +175,7 @@ export default function Header({
|
|||
} finally {
|
||||
setSharing(false);
|
||||
}
|
||||
}, [activePage, dashboardParams, doCopy, i18n.language]);
|
||||
}, [activePage, dashboardActionsBlocked, dashboardParams, doCopy, i18n.language]);
|
||||
|
||||
const navLink = (page: Page, e: React.MouseEvent, hash?: string) => {
|
||||
if (e.metaKey || e.ctrlKey || e.shiftKey || e.button !== 0) return;
|
||||
|
|
@ -206,8 +214,8 @@ export default function Header({
|
|||
</button>
|
||||
<button
|
||||
onClick={onUpdateEdit}
|
||||
disabled={savingSearch}
|
||||
className="cursor-pointer px-3 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-wait flex items-center gap-1.5"
|
||||
disabled={savingSearch || dashboardActionsBlocked}
|
||||
className="cursor-pointer px-3 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-1.5"
|
||||
>
|
||||
{savingSearch && <SpinnerIcon className="w-4 h-4 animate-spin" />}
|
||||
{savingSearch ? t('savedPage.updating') : t('common.update')}
|
||||
|
|
@ -216,14 +224,16 @@ export default function Header({
|
|||
</div>
|
||||
)}
|
||||
{/* Left: Logo + nav */}
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex min-w-0 items-center gap-4">
|
||||
<a
|
||||
href="/"
|
||||
className="flex cursor-pointer items-center gap-2 hover:opacity-80 transition-opacity"
|
||||
className="flex min-w-0 cursor-pointer items-center gap-2 hover:opacity-80 transition-opacity"
|
||||
onClick={(e) => navLink('home', e)}
|
||||
>
|
||||
<LogoIcon className="w-5 h-5 text-teal-400" />
|
||||
<span className="text-lg font-semibold text-teal-300">{t('header.appName')}</span>
|
||||
<LogoIcon className="w-5 h-5 shrink-0 text-teal-400" />
|
||||
<span className="max-w-[9rem] truncate whitespace-nowrap text-lg font-semibold text-teal-300 sm:max-w-none">
|
||||
{t('header.appName')}
|
||||
</span>
|
||||
</a>
|
||||
|
||||
{/* Desktop nav */}
|
||||
|
|
@ -266,14 +276,14 @@ export default function Header({
|
|||
</div>
|
||||
|
||||
{/* Right side */}
|
||||
<div className="flex items-center gap-2 ml-auto">
|
||||
<div className="ml-auto flex shrink-0 items-center gap-2">
|
||||
{/* Desktop-only dashboard actions */}
|
||||
{!useSidebarNav && activePage === 'dashboard' && (
|
||||
{!useSidebarNav && activePage === 'dashboard' && user && (
|
||||
<>
|
||||
<button
|
||||
onClick={handleShare}
|
||||
disabled={sharing}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
|
||||
disabled={sharing || dashboardActionsBlocked}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{sharing ? (
|
||||
<>
|
||||
|
|
@ -295,8 +305,8 @@ export default function Header({
|
|||
{exportState && (
|
||||
<button
|
||||
onClick={() => setExportMenuOpen(true)}
|
||||
disabled={exportState.exporting}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
|
||||
disabled={exportState.exporting || dashboardActionsBlocked}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
|
||||
title={t('header.exportToExcel')}
|
||||
>
|
||||
<DownloadIcon className="w-4 h-4" />
|
||||
|
|
@ -306,8 +316,8 @@ export default function Header({
|
|||
{onSaveSearch && !editingSearch && (
|
||||
<button
|
||||
onClick={onSaveSearch}
|
||||
disabled={savingSearch}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50"
|
||||
disabled={savingSearch || dashboardActionsBlocked}
|
||||
className="flex cursor-pointer items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{savingSearch ? (
|
||||
<SpinnerIcon className="w-4 h-4 animate-spin" />
|
||||
|
|
@ -363,7 +373,7 @@ export default function Header({
|
|||
{useSidebarNav && !user && (
|
||||
<button
|
||||
onClick={onRegisterClick}
|
||||
className="cursor-pointer px-4 py-1.5 rounded bg-teal-600 hover:bg-teal-700 transition-colors text-sm font-semibold"
|
||||
className="flex h-8 max-w-[8.5rem] shrink-0 cursor-pointer items-center justify-center truncate whitespace-nowrap rounded bg-teal-600 px-2.5 text-xs font-semibold leading-none transition-colors hover:bg-teal-700 sm:max-w-none sm:px-3 sm:text-sm"
|
||||
>
|
||||
{t('header.createAccount')}
|
||||
</button>
|
||||
|
|
@ -410,6 +420,7 @@ export default function Header({
|
|||
onToggleTheme={onToggleTheme}
|
||||
exportState={exportState}
|
||||
onOpenExportMenu={() => setExportMenuOpen(true)}
|
||||
dashboardActionsDisabled={dashboardActionsBlocked}
|
||||
onSaveSearch={onSaveSearch}
|
||||
savingSearch={savingSearch}
|
||||
isEditingSearch={!!editingSearch}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ interface MobileMenuProps {
|
|||
onToggleTheme: () => void;
|
||||
exportState: HeaderExportState | null;
|
||||
onOpenExportMenu: () => void;
|
||||
dashboardActionsDisabled?: boolean;
|
||||
onSaveSearch: (() => void) | null;
|
||||
savingSearch: boolean;
|
||||
isEditingSearch: boolean;
|
||||
|
|
@ -41,6 +42,7 @@ export default function MobileMenu({
|
|||
onToggleTheme,
|
||||
exportState,
|
||||
onOpenExportMenu,
|
||||
dashboardActionsDisabled = false,
|
||||
onSaveSearch,
|
||||
savingSearch,
|
||||
isEditingSearch,
|
||||
|
|
@ -101,7 +103,7 @@ export default function MobileMenu({
|
|||
</a>
|
||||
);
|
||||
|
||||
const dashboardActions = activePage === 'dashboard' && (
|
||||
const dashboardActions = activePage === 'dashboard' && user && (
|
||||
<div className="px-2 py-2 border-b border-navy-700">
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
<button
|
||||
|
|
@ -109,7 +111,7 @@ export default function MobileMenu({
|
|||
onShare();
|
||||
onClose();
|
||||
}}
|
||||
disabled={sharing}
|
||||
disabled={sharing || dashboardActionsDisabled}
|
||||
className={dashboardActionClass}
|
||||
>
|
||||
{sharing ? (
|
||||
|
|
@ -127,8 +129,8 @@ export default function MobileMenu({
|
|||
onClose();
|
||||
onOpenExportMenu();
|
||||
}}
|
||||
disabled={exportState.exporting}
|
||||
className={dashboardActionClass}
|
||||
disabled={exportState.exporting || dashboardActionsDisabled}
|
||||
>
|
||||
<DownloadIcon className="w-4 h-4" />
|
||||
{exportState.exporting ? t('header.exporting') : t('header.exportLabel')}
|
||||
|
|
@ -140,7 +142,7 @@ export default function MobileMenu({
|
|||
onSaveSearch();
|
||||
onClose();
|
||||
}}
|
||||
disabled={savingSearch}
|
||||
disabled={savingSearch || dashboardActionsDisabled}
|
||||
className={dashboardActionClass}
|
||||
>
|
||||
{savingSearch ? (
|
||||
|
|
@ -199,7 +201,7 @@ export default function MobileMenu({
|
|||
</button>
|
||||
|
||||
{/* Language selector */}
|
||||
<div className="flex max-w-full gap-1 overflow-x-auto overflow-y-hidden px-3 pb-1 scrollbar-hide">
|
||||
<div className="grid max-w-full grid-cols-3 gap-1 px-3 pb-1">
|
||||
{SUPPORTED_LANGUAGES.map((lang) => (
|
||||
<button
|
||||
key={lang.code}
|
||||
|
|
@ -208,7 +210,7 @@ export default function MobileMenu({
|
|||
localStorage.setItem('language', lang.code);
|
||||
void changeAppLanguage(lang.code);
|
||||
}}
|
||||
className={`flex-none min-w-[2.5rem] flex cursor-pointer items-center justify-center gap-1.5 px-2 py-1.5 rounded text-sm ${
|
||||
className={`flex min-w-0 cursor-pointer items-center justify-center gap-1.5 rounded px-2 py-1.5 text-sm ${
|
||||
i18n.language === lang.code
|
||||
? 'bg-navy-700 text-white font-medium'
|
||||
: 'text-warm-400 hover:bg-navy-800 hover:text-white'
|
||||
|
|
|
|||
|
|
@ -3,12 +3,17 @@ import type { ReactNode } from 'react';
|
|||
interface PillGroupProps {
|
||||
children: ReactNode;
|
||||
className?: string;
|
||||
wrap?: boolean;
|
||||
}
|
||||
|
||||
export function PillGroup({ children, className = '' }: PillGroupProps) {
|
||||
export function PillGroup({ children, className = '', wrap = false }: PillGroupProps) {
|
||||
return (
|
||||
<div
|
||||
className={`flex min-w-0 max-w-full flex-nowrap gap-1.5 overflow-x-auto overscroll-x-contain touch-pan-x touch-pan-y scrollbar-hide md:flex-wrap md:overflow-x-visible ${className}`}
|
||||
className={`flex min-w-0 max-w-full gap-1.5 ${
|
||||
wrap
|
||||
? 'flex-wrap overflow-x-visible'
|
||||
: 'flex-nowrap overflow-x-auto overscroll-x-contain touch-pan-x touch-pan-y scrollbar-hide md:flex-wrap md:overflow-x-visible'
|
||||
} ${className}`}
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { useRef } from 'react';
|
||||
import { createPortal } from 'react-dom';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import type React from 'react';
|
||||
import type { SearchResult } from '../../hooks/useLocationSearch';
|
||||
import { useDropdownPosition } from '../../hooks/useDropdownPosition';
|
||||
|
|
@ -13,6 +14,7 @@ interface SearchHook {
|
|||
activeIndex: number;
|
||||
setActiveIndex: (idx: number) => void;
|
||||
open: boolean;
|
||||
searching?: boolean;
|
||||
handleInputChange: (value: string) => void;
|
||||
handleKeyDown: (e: React.KeyboardEvent, onSelect: (result: SearchResult) => void) => void;
|
||||
showEmptySearches: () => void;
|
||||
|
|
@ -23,6 +25,8 @@ interface PlaceSearchInputProps {
|
|||
onSelect: (result: SearchResult) => void;
|
||||
loading?: boolean;
|
||||
placeholder?: string;
|
||||
ariaLabel?: string;
|
||||
name?: string;
|
||||
size?: 'sm' | 'xs';
|
||||
inputClassName?: string;
|
||||
inputRef?: React.Ref<HTMLInputElement>;
|
||||
|
|
@ -35,19 +39,28 @@ export function PlaceSearchInput({
|
|||
onSelect,
|
||||
loading,
|
||||
placeholder,
|
||||
ariaLabel,
|
||||
name,
|
||||
size = 'sm',
|
||||
inputClassName,
|
||||
inputRef,
|
||||
onInputChange,
|
||||
portal,
|
||||
}: PlaceSearchInputProps) {
|
||||
const { t } = useTranslation();
|
||||
const sm = size === 'sm';
|
||||
const iconSize = sm ? 'w-4 h-4' : 'w-3 h-3';
|
||||
const spinnerSize = sm ? 'w-4 h-4' : 'w-3 h-3';
|
||||
const wrapperRef = useRef<HTMLDivElement>(null);
|
||||
const dropdownPos = useDropdownPosition(wrapperRef, portal ? search.open : false);
|
||||
|
||||
const showDropdown = search.open && search.results.length > 0;
|
||||
const showEmptyResults =
|
||||
search.open &&
|
||||
!search.searching &&
|
||||
search.query.trim().length >= 2 &&
|
||||
search.results.length === 0;
|
||||
const showDropdown = search.open && (search.results.length > 0 || showEmptyResults);
|
||||
const showSpinner = loading || search.searching;
|
||||
|
||||
const dropdown = showDropdown && (
|
||||
<div
|
||||
|
|
@ -64,57 +77,66 @@ export function PlaceSearchInput({
|
|||
: undefined
|
||||
}
|
||||
>
|
||||
{search.results.map((result, idx) => (
|
||||
<button
|
||||
key={
|
||||
result.type === 'postcode'
|
||||
? `pc-${result.label}`
|
||||
: result.type === 'address'
|
||||
? `addr-${result.postcode}-${result.address}-${result.lat}`
|
||||
: `pl-${result.name}-${result.lat}`
|
||||
}
|
||||
type="button"
|
||||
className={`w-full text-left flex items-center cursor-pointer ${
|
||||
sm ? 'px-3 py-2 gap-2 text-sm' : 'px-2 py-1.5 gap-1.5 text-xs'
|
||||
} ${
|
||||
idx === search.activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30'
|
||||
: 'hover:bg-warm-50 dark:hover:bg-warm-700'
|
||||
}`}
|
||||
onMouseEnter={() => search.setActiveIndex(idx)}
|
||||
onMouseDown={(e) => {
|
||||
e.preventDefault();
|
||||
onSelect(result);
|
||||
}}
|
||||
{showEmptyResults ? (
|
||||
<div
|
||||
className={`text-warm-500 dark:text-warm-400 ${sm ? 'px-3 py-2 text-sm' : 'px-2 py-1.5 text-xs'}`}
|
||||
role="status"
|
||||
>
|
||||
{result.type === 'postcode' ? (
|
||||
<>
|
||||
<SearchIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="text-warm-700 dark:text-warm-200">{result.label}</span>
|
||||
</>
|
||||
) : result.type === 'address' ? (
|
||||
<>
|
||||
<HouseIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="min-w-0 text-warm-700 dark:text-warm-200">
|
||||
<span className="block truncate">{result.address}</span>
|
||||
<span className="block truncate text-warm-400 dark:text-warm-500">
|
||||
{result.postcode}
|
||||
{t('locationSearch.noResults')}
|
||||
</div>
|
||||
) : (
|
||||
search.results.map((result, idx) => (
|
||||
<button
|
||||
key={
|
||||
result.type === 'postcode'
|
||||
? `pc-${result.label}`
|
||||
: result.type === 'address'
|
||||
? `addr-${result.postcode}-${result.address}-${result.lat}`
|
||||
: `pl-${result.name}-${result.lat}`
|
||||
}
|
||||
type="button"
|
||||
className={`w-full text-left flex items-center cursor-pointer ${
|
||||
sm ? 'px-3 py-2 gap-2 text-sm' : 'px-2 py-1.5 gap-1.5 text-xs'
|
||||
} ${
|
||||
idx === search.activeIndex
|
||||
? 'bg-teal-50 dark:bg-teal-900/30'
|
||||
: 'hover:bg-warm-50 dark:hover:bg-warm-700'
|
||||
}`}
|
||||
onMouseEnter={() => search.setActiveIndex(idx)}
|
||||
onMouseDown={(e) => {
|
||||
e.preventDefault();
|
||||
onSelect(result);
|
||||
}}
|
||||
>
|
||||
{result.type === 'postcode' ? (
|
||||
<>
|
||||
<SearchIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="text-warm-700 dark:text-warm-200">{result.label}</span>
|
||||
</>
|
||||
) : result.type === 'address' ? (
|
||||
<>
|
||||
<HouseIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="min-w-0 text-warm-700 dark:text-warm-200">
|
||||
<span className="block truncate">{result.address}</span>
|
||||
<span className="block truncate text-warm-400 dark:text-warm-500">
|
||||
{result.postcode}
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<MapPinIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="text-warm-700 dark:text-warm-200">
|
||||
{result.name}
|
||||
{result.city && (
|
||||
<span className="text-warm-400 dark:text-warm-500"> ({result.city})</span>
|
||||
)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
))}
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<MapPinIcon className={`${iconSize} text-warm-400 dark:text-warm-500 shrink-0`} />
|
||||
<span className="text-warm-700 dark:text-warm-200">
|
||||
{result.name}
|
||||
{result.city && (
|
||||
<span className="text-warm-400 dark:text-warm-500"> ({result.city})</span>
|
||||
)}
|
||||
</span>
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
|
|
@ -123,6 +145,7 @@ export function PlaceSearchInput({
|
|||
<input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
name={name}
|
||||
value={search.query}
|
||||
onChange={(e) => {
|
||||
search.handleInputChange(e.target.value);
|
||||
|
|
@ -132,11 +155,12 @@ export function PlaceSearchInput({
|
|||
search.showEmptySearches();
|
||||
}}
|
||||
onKeyDown={(e) => search.handleKeyDown(e, onSelect)}
|
||||
aria-label={ariaLabel ?? placeholder}
|
||||
placeholder={placeholder}
|
||||
className={inputClassName}
|
||||
/>
|
||||
|
||||
{loading && (
|
||||
{showSpinner && (
|
||||
<div
|
||||
className={`absolute right-2 top-1/2 -translate-y-1/2 ${spinnerSize} border-2 border-warm-300 dark:border-warm-600 border-t-teal-500 rounded-full animate-spin`}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -4,18 +4,27 @@ interface SearchInputProps {
|
|||
value: string;
|
||||
onChange: (value: string) => void;
|
||||
placeholder?: string;
|
||||
ariaLabel?: string;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export function SearchInput({ value, onChange, placeholder, className = '' }: SearchInputProps) {
|
||||
export function SearchInput({
|
||||
value,
|
||||
onChange,
|
||||
placeholder,
|
||||
ariaLabel,
|
||||
className = '',
|
||||
}: SearchInputProps) {
|
||||
const { t } = useTranslation();
|
||||
const inputPlaceholder = placeholder ?? t('common.search');
|
||||
|
||||
return (
|
||||
<input
|
||||
type="text"
|
||||
value={value}
|
||||
onChange={(e) => onChange(e.target.value)}
|
||||
placeholder={placeholder ?? t('common.search')}
|
||||
placeholder={inputPlaceholder}
|
||||
aria-label={ariaLabel ?? inputPlaceholder}
|
||||
className={`w-full px-2 py-1 text-sm border rounded bg-white dark:bg-navy-800 dark:text-warm-200 border-warm-200 dark:border-navy-700 placeholder-warm-400 dark:placeholder-warm-500 focus:outline-none focus:ring-1 focus:ring-teal-400 ${className}`}
|
||||
/>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ const LISTING_CLUSTER_MAX_ZOOM = 24;
|
|||
const LISTING_CLUSTER_POPUP_LIMIT = 30;
|
||||
const LISTING_SPIDERFY_LIMIT = 12;
|
||||
const TILE_SIZE = 512;
|
||||
const PRICE_LABEL_CHARACTER_SET = '£0123456789.kM';
|
||||
|
||||
interface SingleListingPopupInfo {
|
||||
mode: 'single';
|
||||
|
|
@ -472,6 +473,7 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
|
|||
outlineWidth: 3,
|
||||
outlineColor: isDark ? [10, 10, 10, 220] : [255, 255, 255, 230],
|
||||
fontSettings: { sdf: true },
|
||||
characterSet: PRICE_LABEL_CHARACTER_SET,
|
||||
sizeUnits: 'pixels',
|
||||
sizeMinPixels: 10,
|
||||
sizeMaxPixels: 14,
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ export function useLocationSearch(mode?: string) {
|
|||
const [recentSearches, setRecentSearches] = useState<SearchResult[]>(readRecentSearches);
|
||||
const [activeIndex, setActiveIndex] = useState(-1);
|
||||
const [open, setOpen] = useState(false);
|
||||
const [searching, setSearching] = useState(false);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
const latestQueryRef = useRef('');
|
||||
|
|
@ -176,6 +177,7 @@ export function useLocationSearch(mode?: string) {
|
|||
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
setSearching(false);
|
||||
setResults(recentSearches);
|
||||
lastResultsRef.current = [];
|
||||
setOpen(recentSearches.length > 0);
|
||||
|
|
@ -183,6 +185,7 @@ export function useLocationSearch(mode?: string) {
|
|||
}
|
||||
|
||||
if (!mode && looksLikePostcode(trimmed)) {
|
||||
setSearching(false);
|
||||
const postcodeResults: SearchResult[] = [
|
||||
{ type: 'postcode', label: normalizePostcode(trimmed) },
|
||||
];
|
||||
|
|
@ -192,6 +195,7 @@ export function useLocationSearch(mode?: string) {
|
|||
}
|
||||
|
||||
if (trimmed.length < 2) {
|
||||
setSearching(false);
|
||||
setResults([]);
|
||||
setOpen(false);
|
||||
return;
|
||||
|
|
@ -200,6 +204,7 @@ export function useLocationSearch(mode?: string) {
|
|||
const locallyFilteredResults = filterResultsForQuery(lastResultsRef.current, trimmed);
|
||||
setResults(locallyFilteredResults);
|
||||
setOpen(locallyFilteredResults.length > 0);
|
||||
setSearching(true);
|
||||
|
||||
debounceRef.current = setTimeout(async () => {
|
||||
const controller = new AbortController();
|
||||
|
|
@ -211,7 +216,13 @@ export function useLocationSearch(mode?: string) {
|
|||
`/api/places?${params}`,
|
||||
authHeaders({ signal: controller.signal })
|
||||
);
|
||||
if (!res.ok) return;
|
||||
if (!res.ok) {
|
||||
if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
|
||||
setResults([]);
|
||||
setOpen(true);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const json: {
|
||||
places: PlaceResult[];
|
||||
postcodes?: string[];
|
||||
|
|
@ -253,9 +264,17 @@ export function useLocationSearch(mode?: string) {
|
|||
lastResultsRef.current = combinedResults;
|
||||
const matchingResults = filterResultsForQuery(combinedResults, trimmed);
|
||||
setResults(matchingResults);
|
||||
setOpen(matchingResults.length > 0);
|
||||
setOpen(true);
|
||||
} catch (err) {
|
||||
logNonAbortError('places search', err);
|
||||
if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
|
||||
setResults([]);
|
||||
setOpen(true);
|
||||
}
|
||||
} finally {
|
||||
if (!controller.signal.aborted && latestQueryRef.current.trim() === trimmed) {
|
||||
setSearching(false);
|
||||
}
|
||||
}
|
||||
}, 200);
|
||||
},
|
||||
|
|
@ -264,7 +283,7 @@ export function useLocationSearch(mode?: string) {
|
|||
|
||||
const showEmptySearches = useCallback(() => {
|
||||
if (latestQueryRef.current.trim()) {
|
||||
setOpen(results.length > 0);
|
||||
setOpen(results.length > 0 || latestQueryRef.current.trim().length >= 2);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -278,6 +297,7 @@ export function useLocationSearch(mode?: string) {
|
|||
const clear = useCallback(() => {
|
||||
setQuery('');
|
||||
latestQueryRef.current = '';
|
||||
setSearching(false);
|
||||
setResults([]);
|
||||
lastResultsRef.current = [];
|
||||
setOpen(false);
|
||||
|
|
@ -308,6 +328,8 @@ export function useLocationSearch(mode?: string) {
|
|||
e.preventDefault();
|
||||
if (activeIndex >= 0 && activeIndex < results.length) {
|
||||
onSelect(results[activeIndex]);
|
||||
} else if (results.length > 0) {
|
||||
onSelect(results[0]);
|
||||
} else if (looksLikePostcode(query)) {
|
||||
onSelect({ type: 'postcode', label: normalizePostcode(query) });
|
||||
}
|
||||
|
|
@ -332,6 +354,7 @@ export function useLocationSearch(mode?: string) {
|
|||
activeIndex,
|
||||
setActiveIndex,
|
||||
open,
|
||||
searching,
|
||||
setOpen,
|
||||
handleInputChange,
|
||||
handleKeyDown,
|
||||
|
|
|
|||
|
|
@ -916,6 +916,7 @@ const de: Translations = {
|
|||
// ── Location Search ────────────────────────────────
|
||||
locationSearch: {
|
||||
placeholder: 'Orte oder Postcodes suchen...',
|
||||
noResults: 'Keine passenden Orte oder Postcodes',
|
||||
postcodeNotFound: 'Postcode nicht gefunden',
|
||||
lookupFailed: 'Suche fehlgeschlagen',
|
||||
searchLabel: 'Orte oder Postcodes suchen',
|
||||
|
|
|
|||
|
|
@ -892,6 +892,7 @@ const en = {
|
|||
// ── Location Search ────────────────────────────────
|
||||
locationSearch: {
|
||||
placeholder: 'Search places or postcodes...',
|
||||
noResults: 'No matching places or postcodes',
|
||||
postcodeNotFound: 'Postcode not found',
|
||||
lookupFailed: 'Lookup failed',
|
||||
searchLabel: 'Search places or postcodes',
|
||||
|
|
|
|||
|
|
@ -924,6 +924,7 @@ const fr: Translations = {
|
|||
// ── Location Search ────────────────────────────────
|
||||
locationSearch: {
|
||||
placeholder: 'Rechercher des lieux ou codes postaux...',
|
||||
noResults: 'Aucun lieu ni code postal correspondant',
|
||||
postcodeNotFound: 'Code postal introuvable',
|
||||
lookupFailed: 'Échec de la recherche',
|
||||
searchLabel: 'Rechercher des lieux ou codes postaux',
|
||||
|
|
|
|||
|
|
@ -876,6 +876,7 @@ const hi: Translations = {
|
|||
|
||||
locationSearch: {
|
||||
placeholder: 'स्थान या पोस्टकोड खोजें...',
|
||||
noResults: 'कोई मिलती-जुलती जगह या पोस्टकोड नहीं मिला',
|
||||
postcodeNotFound: 'पोस्टकोड नहीं मिला',
|
||||
lookupFailed: 'खोज विफल रही',
|
||||
searchLabel: 'स्थान या पोस्टकोड खोजें',
|
||||
|
|
|
|||
|
|
@ -910,6 +910,7 @@ const hu: Translations = {
|
|||
// ── Location Search ────────────────────────────────
|
||||
locationSearch: {
|
||||
placeholder: 'Helyek vagy irányítószámok keresése...',
|
||||
noResults: 'Nincs egyező hely vagy irányítószám',
|
||||
postcodeNotFound: 'Irányítószám nem található',
|
||||
lookupFailed: 'A keresés sikertelen',
|
||||
searchLabel: 'Helyek vagy irányítószámok keresése',
|
||||
|
|
|
|||
|
|
@ -850,6 +850,7 @@ const zh: Translations = {
|
|||
// ── Location Search ────────────────────────────────
|
||||
locationSearch: {
|
||||
placeholder: '搜索地点或邮编...',
|
||||
noResults: '未找到匹配的地点或邮编',
|
||||
postcodeNotFound: '未找到该邮编',
|
||||
lookupFailed: '查询失败',
|
||||
searchLabel: '搜索地点或邮编',
|
||||
|
|
|
|||
|
|
@ -1,19 +1,33 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
import json
|
||||
import zipfile
|
||||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.validate_outputs import main
|
||||
|
||||
|
||||
def write_boundary(path, postcodes):
|
||||
def polygon(offset=0):
|
||||
x = float(offset)
|
||||
return {
|
||||
"type": "Polygon",
|
||||
"coordinates": [
|
||||
[(x, 0.0), (x + 0.001, 0.0), (x + 0.001, 0.001), (x, 0.001), (x, 0.0)]
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def write_boundary(path, postcodes, geometries=None):
|
||||
units = path / "units"
|
||||
units.mkdir(parents=True)
|
||||
features = [
|
||||
{"type": "Feature", "properties": {"postcodes": postcode}, "geometry": None}
|
||||
for postcode in postcodes
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"postcodes": postcode},
|
||||
"geometry": (geometries[index] if geometries else polygon(index)),
|
||||
}
|
||||
for index, postcode in enumerate(postcodes)
|
||||
]
|
||||
(units / "AA1.geojson").write_text(
|
||||
json.dumps({"type": "FeatureCollection", "features": features})
|
||||
|
|
@ -111,3 +125,100 @@ def test_rejects_postcode_boundary_mismatch(tmp_path, monkeypatch, capsys):
|
|||
stderr = capsys.readouterr().err
|
||||
assert "missing boundaries" in stderr
|
||||
assert "boundary postcodes are absent" in stderr
|
||||
|
||||
|
||||
def test_rejects_invalid_postcode_boundary_features(tmp_path, monkeypatch, capsys):
|
||||
postcodes_path = tmp_path / "postcodes.parquet"
|
||||
boundaries_path = tmp_path / "postcode_boundaries"
|
||||
units = boundaries_path / "units"
|
||||
units.mkdir(parents=True)
|
||||
pl.DataFrame({"postcode": ["AA1 1AA"]}).write_parquet(postcodes_path)
|
||||
bowtie = {
|
||||
"type": "Polygon",
|
||||
"coordinates": [[(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]],
|
||||
}
|
||||
features = [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"postcodes": "AA1 1AA"},
|
||||
"geometry": polygon(),
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"postcodes": "AA1 1AA"},
|
||||
"geometry": polygon(1),
|
||||
},
|
||||
{"type": "Feature", "properties": {}, "geometry": polygon(2)},
|
||||
{"type": "Feature", "properties": {"postcodes": "AA1 1AB"}, "geometry": None},
|
||||
{"type": "Feature", "properties": {"postcodes": "AA1 1AC"}, "geometry": bowtie},
|
||||
]
|
||||
(units / "AA1.geojson").write_text(
|
||||
json.dumps({"type": "FeatureCollection", "features": features})
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"sys.argv",
|
||||
[
|
||||
"validate_outputs",
|
||||
"--postcode-boundary-match",
|
||||
f"{postcodes_path}::{boundaries_path}",
|
||||
],
|
||||
)
|
||||
|
||||
assert main() == 1
|
||||
stderr = capsys.readouterr().err
|
||||
assert "duplicate boundary postcode features" in stderr
|
||||
assert "missing properties.postcodes" in stderr
|
||||
assert "missing or empty geometry" in stderr
|
||||
assert "invalid boundary geometries" in stderr
|
||||
|
||||
|
||||
def test_validates_active_english_arcgis_boundary_matches(tmp_path, monkeypatch):
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
boundaries_path = tmp_path / "postcode_boundaries"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
|
||||
"ctry25cd": ["E92000001", "E92000001", "W92000004"],
|
||||
"doterm": [None, "2020-01-01", None],
|
||||
}
|
||||
).write_parquet(arcgis_path)
|
||||
write_boundary(boundaries_path, ["AA1 1AA"])
|
||||
|
||||
monkeypatch.setattr(
|
||||
"sys.argv",
|
||||
[
|
||||
"validate_outputs",
|
||||
"--active-postcode-boundary-match",
|
||||
f"{arcgis_path}::{boundaries_path}",
|
||||
],
|
||||
)
|
||||
|
||||
assert main() == 0
|
||||
|
||||
|
||||
def test_rejects_active_english_arcgis_boundary_mismatch(tmp_path, monkeypatch, capsys):
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
boundaries_path = tmp_path / "postcode_boundaries"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
|
||||
"ctry25cd": ["E92000001", "E92000001", "W92000004"],
|
||||
"doterm": [None, None, None],
|
||||
}
|
||||
).write_parquet(arcgis_path)
|
||||
write_boundary(boundaries_path, ["AA1 1AA", "CF1 1AA"])
|
||||
|
||||
monkeypatch.setattr(
|
||||
"sys.argv",
|
||||
[
|
||||
"validate_outputs",
|
||||
"--active-postcode-boundary-match",
|
||||
f"{arcgis_path}::{boundaries_path}",
|
||||
],
|
||||
)
|
||||
|
||||
assert main() == 1
|
||||
stderr = capsys.readouterr().err
|
||||
assert "active English postcodes" in stderr
|
||||
assert "not active English postcodes" in stderr
|
||||
|
|
|
|||
|
|
@ -46,8 +46,21 @@ def _require_tippecanoe() -> str:
|
|||
return executable
|
||||
|
||||
|
||||
def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
|
||||
df = (
|
||||
def _write_geojsonseq(csvs: list[Path], output_path: Path) -> tuple[int, int]:
|
||||
"""Write one weighted GeoJSON point per distinct (anchor, month, type).
|
||||
|
||||
Returns ``(feature_count, incident_count)``. police.uk snaps every incident
|
||||
to a shared "map point" anchor, so many incidents land on the exact same
|
||||
coordinate. Collapsing them into one feature carrying ``count`` (the number
|
||||
of incidents) keeps the per-crime-type and per-month filters intact while
|
||||
turning each hotspot into a single high-weight point. That matters because
|
||||
tippecanoe's ``--drop-densest-as-needed`` thins *feature density*, not
|
||||
weight: with one feature per row the busiest streets were silently deleted;
|
||||
with one weighted feature per anchor those hotspots survive and the dropped
|
||||
detail is only redundant duplicate points. The heatmap reads ``count`` as
|
||||
its weight.
|
||||
"""
|
||||
grouped = (
|
||||
pl.scan_csv(
|
||||
csvs,
|
||||
schema_overrides={
|
||||
|
|
@ -67,11 +80,15 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
|
|||
.drop_nulls(["lon", "lat"])
|
||||
.filter(pl.col("lon").is_between(-9.5, 5.0))
|
||||
.filter(pl.col("lat").is_between(49.0, 57.0))
|
||||
.group_by("lon", "lat", "month", "crime_type")
|
||||
.len()
|
||||
.rename({"len": "count"})
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
|
||||
incident_count = int(grouped["count"].sum())
|
||||
with output_path.open("w") as file:
|
||||
for row in df.iter_rows(named=True):
|
||||
for row in grouped.iter_rows(named=True):
|
||||
feature = {
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
|
|
@ -79,15 +96,15 @@ def _write_geojsonseq(csvs: list[Path], output_path: Path) -> int:
|
|||
"coordinates": [row["lon"], row["lat"]],
|
||||
},
|
||||
"properties": {
|
||||
"count": 1,
|
||||
"weight": 1,
|
||||
"count": row["count"],
|
||||
"weight": row["count"],
|
||||
"month": row["month"],
|
||||
"crime_type": row["crime_type"],
|
||||
},
|
||||
}
|
||||
file.write(json.dumps(feature, separators=(",", ":")) + "\n")
|
||||
|
||||
return df.height
|
||||
return grouped.height, incident_count
|
||||
|
||||
|
||||
def build_crime_hotspot_tiles(
|
||||
|
|
@ -104,9 +121,10 @@ def build_crime_hotspot_tiles(
|
|||
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp:
|
||||
ndjson_path = Path(tmp) / "crime_hotspots.geojsonseq"
|
||||
feature_count = _write_geojsonseq(csvs, ndjson_path)
|
||||
feature_count, incident_count = _write_geojsonseq(csvs, ndjson_path)
|
||||
print(
|
||||
f"Writing {feature_count:,} approximate crime heatmap points "
|
||||
f"Writing {feature_count:,} weighted crime heatmap points "
|
||||
f"({incident_count:,} incidents) "
|
||||
f"from {min(selected_months)} to {max(selected_months)}"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,25 @@
|
|||
"""Aggregate police.uk street crime to postcodes by 50m spatial proximity.
|
||||
"""Aggregate police.uk street crime to postcodes by spatial proximity.
|
||||
|
||||
Instead of attributing each incident to its published LSOA code, this transform
|
||||
counts the anonymised incident *points* that fall within 50m of each postcode's
|
||||
boundary polygon (the polygon buffered outward by 50m). A point inside several
|
||||
overlapping buffers counts for each postcode -- the same multiplicity the
|
||||
tree-density filter uses for features near more than one postcode.
|
||||
counts the anonymised incident *points* that fall within ``buffer_m`` (default
|
||||
100m) of each postcode's boundary polygon (the polygon buffered outward). A point
|
||||
inside several overlapping buffers counts for each postcode -- the same
|
||||
multiplicity the tree-density filter uses for features near more than one
|
||||
postcode. The wide 100m buffer deliberately smooths police.uk's snap-to-grid
|
||||
coordinates, which would otherwise make the count hypersensitive to which side of
|
||||
a narrow line a shared "map point" anchor happened to land on.
|
||||
|
||||
The metric is a raw annualised count ("incidents/year within 50m"); there is no
|
||||
per-capita denominator. Outputs mirror the old LSOA transform's shape but are
|
||||
keyed on ``postcode`` instead of ``LSOA code``:
|
||||
Counts are **area-normalised**: each postcode's count is divided by its buffered
|
||||
catchment area and rescaled by the median catchment area, so the metric reflects
|
||||
crime *density* rather than how much ground the buffer sweeps (a median-sized
|
||||
catchment is left unchanged; a large rural postcode is no longer inflated simply
|
||||
for covering more of the map). Normalising by the buffered area -- the region
|
||||
that actually collects points -- rather than the raw polygon keeps tiny unit
|
||||
postcodes from being over-inflated by the fixed buffer-ring floor. The headline
|
||||
``"{type} (avg/yr)"`` is the simple mean of the per-year annualised counts, so it
|
||||
equals the average of the by-year chart bars.
|
||||
|
||||
Outputs mirror the old LSOA transform's shape but are keyed on ``postcode``:
|
||||
|
||||
* ``crime_by_postcode.parquet`` -- ``postcode`` + ``"{type} (avg/yr)"`` columns.
|
||||
* ``crime_by_postcode_by_year.parquet`` -- ``postcode`` + ``"{type} (by year)"``
|
||||
|
|
@ -16,14 +27,15 @@ keyed on ``postcode`` instead of ``LSOA code``:
|
|||
|
||||
Caveat: police.uk coordinates are snapped to a fixed set of anonymous "map
|
||||
points", not true locations, and a share of rows have no coordinate at all
|
||||
(dropped here). Spatial totals are therefore lower than, and fuzzier than, the
|
||||
old LSOA-tagged counts -- by design, not a regression.
|
||||
(dropped here). Spatial totals are therefore fuzzier than the old LSOA-tagged
|
||||
counts -- by design, not a regression.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
|
@ -41,7 +53,7 @@ from pipeline.transform.postcode_boundaries.loader import load_postcode_polygons
|
|||
# Serious types first so column order is stable and self-documenting.
|
||||
ALL_CRIME_TYPES: tuple[str, ...] = SERIOUS_CRIME_TYPES + MINOR_CRIME_TYPES
|
||||
|
||||
DEFAULT_BUFFER_M = 50.0
|
||||
DEFAULT_BUFFER_M = 100.0
|
||||
MONTH_DIR_RE = re.compile(r"^\d{4}-\d{2}$")
|
||||
|
||||
# Generous GB bounds; points outside fall in no English postcode anyway, but
|
||||
|
|
@ -108,10 +120,11 @@ def _accumulate_counts(
|
|||
"Month": pl.Utf8,
|
||||
"Crime type": pl.Utf8,
|
||||
}
|
||||
known_types = list(type_to_idx)
|
||||
years = list(year_to_idx)
|
||||
total_points = 0
|
||||
total_matches = 0
|
||||
total_dropped = 0
|
||||
unknown_type_counts: dict[str, int] = {}
|
||||
|
||||
for start in range(0, len(csvs), _CSV_BATCH):
|
||||
batch = csvs[start : start + _CSV_BATCH]
|
||||
|
|
@ -122,31 +135,47 @@ def _accumulate_counts(
|
|||
ignore_errors=True,
|
||||
)
|
||||
.select("Longitude", "Latitude", "Month", "Crime type")
|
||||
.with_columns(pl.col("Month").str.slice(0, 4).cast(pl.Int32).alias("year"))
|
||||
# strict=False: a single malformed Month drops only that row instead
|
||||
# of aborting the whole build (a non-numeric year becomes null and is
|
||||
# filtered out by the year membership check below).
|
||||
.with_columns(
|
||||
pl.col("Month").str.slice(0, 4).cast(pl.Int32, strict=False).alias("year")
|
||||
)
|
||||
.filter(
|
||||
pl.col("Longitude").is_not_null()
|
||||
& pl.col("Latitude").is_not_null()
|
||||
& pl.col("Longitude").is_between(*LON_BOUNDS)
|
||||
& pl.col("Latitude").is_between(*LAT_BOUNDS)
|
||||
& pl.col("Crime type").is_in(known_types)
|
||||
& pl.col("year").is_in(list(year_to_idx))
|
||||
& pl.col("Crime type").is_not_null()
|
||||
& (pl.col("Crime type") != "")
|
||||
& pl.col("year").is_in(years)
|
||||
)
|
||||
# Map crime types to indices with default=None so an unrecognised
|
||||
# type yields a null index we can *report* rather than silently drop
|
||||
# (the legacy LSOA path surfaced unknown types via its dynamic pivot).
|
||||
.with_columns(
|
||||
pl.col("Crime type")
|
||||
.replace_strict(type_to_idx, return_dtype=pl.Int32)
|
||||
.replace_strict(type_to_idx, default=None, return_dtype=pl.Int32)
|
||||
.alias("tidx"),
|
||||
pl.col("year")
|
||||
.replace_strict(year_to_idx, return_dtype=pl.Int32)
|
||||
.alias("yidx"),
|
||||
)
|
||||
.select("Longitude", "Latitude", "tidx", "yidx")
|
||||
.select("Longitude", "Latitude", "Crime type", "tidx", "yidx")
|
||||
.collect(engine="streaming")
|
||||
)
|
||||
|
||||
rows_in = frame.height
|
||||
if rows_in == 0:
|
||||
if frame.height == 0:
|
||||
continue
|
||||
|
||||
unknown = frame.filter(pl.col("tidx").is_null())
|
||||
if unknown.height:
|
||||
for name, cnt in unknown.group_by("Crime type").len().iter_rows():
|
||||
unknown_type_counts[name] = unknown_type_counts.get(name, 0) + cnt
|
||||
frame = frame.filter(pl.col("tidx").is_not_null())
|
||||
if frame.height == 0:
|
||||
continue
|
||||
|
||||
lon = frame["Longitude"].to_numpy()
|
||||
lat = frame["Latitude"].to_numpy()
|
||||
tidx = frame["tidx"].to_numpy()
|
||||
|
|
@ -177,6 +206,20 @@ def _accumulate_counts(
|
|||
|
||||
if total_dropped:
|
||||
print(f"Dropped {total_dropped:,} points outside the BNG transform domain")
|
||||
if unknown_type_counts:
|
||||
total_unknown = sum(unknown_type_counts.values())
|
||||
listed = ", ".join(
|
||||
f"{name!r} ({cnt:,})"
|
||||
for name, cnt in sorted(
|
||||
unknown_type_counts.items(), key=lambda kv: kv[1], reverse=True
|
||||
)
|
||||
)
|
||||
print(
|
||||
f"WARNING: dropped {total_unknown:,} incidents with crime types not in "
|
||||
f"ALL_CRIME_TYPES (taxonomy is stale -- update SERIOUS/MINOR_CRIME_TYPES): "
|
||||
f"{listed}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def _rollup_long(
|
||||
|
|
@ -195,12 +238,27 @@ def _rollup_long(
|
|||
def _write_avg_yr(
|
||||
postcodes: np.ndarray,
|
||||
counts: np.ndarray,
|
||||
valid_month_count: int,
|
||||
years: list[int],
|
||||
months_in_year: dict[int, int],
|
||||
norm: np.ndarray,
|
||||
output_path: Path,
|
||||
) -> None:
|
||||
"""Write ``postcode`` + ``"{type} (avg/yr)"`` annualised totals."""
|
||||
totals = counts.sum(axis=2) # (n_postcodes, n_types)
|
||||
avg = np.round(totals / valid_month_count * 12.0, 1).astype(np.float32)
|
||||
"""Write ``postcode`` + ``"{type} (avg/yr)"`` density-normalised averages.
|
||||
|
||||
The headline figure is the **simple mean of the per-year annualised counts**
|
||||
(each year scaled to a 12-month equivalent), so it equals the average of the
|
||||
by-year chart bars instead of a month-weighted pooled rate. Each postcode's
|
||||
value is then multiplied by ``norm`` (median_area / buffered catchment area)
|
||||
so the metric is a density rather than a footprint-inflated raw count.
|
||||
"""
|
||||
months = np.array([months_in_year[year] for year in years], dtype=np.float64)
|
||||
per_year = counts.astype(np.float64) * 12.0 / months[None, None, :]
|
||||
# Average over the years each type is actually observed anywhere -- the same
|
||||
# per-type x-span the by-year chart plots (server-rs/.../crime_by_year.rs).
|
||||
type_year_present = counts.sum(axis=0) > 0 # (n_types, n_years)
|
||||
years_per_type = np.clip(type_year_present.sum(axis=1), 1, None).astype(np.float64)
|
||||
avg = per_year.sum(axis=2) / years_per_type[None, :] # (n_postcodes, n_types)
|
||||
avg = np.round(avg * norm[:, None], 1).astype(np.float32)
|
||||
|
||||
data: dict[str, np.ndarray] = {"postcode": postcodes}
|
||||
for type_idx, name in enumerate(ALL_CRIME_TYPES):
|
||||
|
|
@ -216,11 +274,20 @@ def _write_by_year(
|
|||
counts: np.ndarray,
|
||||
years: list[int],
|
||||
months_in_year: dict[int, int],
|
||||
norm: np.ndarray,
|
||||
output_path: Path,
|
||||
) -> None:
|
||||
"""Write nested ``"{type} (by year)"`` series plus Serious/Minor rollups."""
|
||||
"""Write nested ``"{type} (by year)"`` series plus Serious/Minor rollups.
|
||||
|
||||
Per-year counts are area-normalised by the same ``norm`` (median_area /
|
||||
buffered catchment area) factor applied to the avg/yr headline, so the chart
|
||||
bars and the headline figure remain mutually consistent.
|
||||
"""
|
||||
months = np.array([months_in_year[year] for year in years], dtype=np.float64)
|
||||
annual = np.round(counts.astype(np.float64) * 12.0 / months[None, None, :], 1)
|
||||
annual = np.round(
|
||||
counts.astype(np.float64) * 12.0 / months[None, None, :] * norm[:, None, None],
|
||||
1,
|
||||
)
|
||||
|
||||
pc_i, ty_i, yr_i = np.nonzero(counts)
|
||||
if pc_i.size == 0:
|
||||
|
|
@ -278,8 +345,27 @@ def transform_crime_spatial(
|
|||
)
|
||||
|
||||
postcodes, polygons = load_postcode_polygons(boundaries_dir, max_postcodes)
|
||||
|
||||
print(f"Buffering {len(postcodes):,} postcode polygons by {buffer_m:g}m...")
|
||||
_buffers, tree = _build_tree(polygons, buffer_m)
|
||||
buffers, tree = _build_tree(polygons, buffer_m)
|
||||
|
||||
# Area-normalisation factor (median_area / catchment_area): divides out the
|
||||
# size of each postcode's catchment so the count measures crime density, not
|
||||
# how much ground the buffer sweeps. We normalise by the *buffered* area --
|
||||
# the region that actually collects points -- rather than the raw polygon, so
|
||||
# a tiny unit postcode isn't over-inflated by the fixed buffer-ring floor.
|
||||
# Buffers are in EPSG:27700, so shapely.area is in m^2.
|
||||
areas = shapely.area(buffers).astype(np.float64)
|
||||
usable_area = np.isfinite(areas) & (areas > 0)
|
||||
if not usable_area.any():
|
||||
raise ValueError("No postcode buffers have a positive area to normalise by")
|
||||
median_area = float(np.median(areas[usable_area]))
|
||||
norm = np.zeros(len(postcodes), dtype=np.float64)
|
||||
norm[usable_area] = median_area / areas[usable_area]
|
||||
print(
|
||||
f"Area-normalising to median catchment area {median_area:,.0f} m^2 "
|
||||
f"({int(usable_area.sum()):,}/{len(areas):,} postcodes have usable area)"
|
||||
)
|
||||
|
||||
type_to_idx = {name: idx for idx, name in enumerate(ALL_CRIME_TYPES)}
|
||||
year_to_idx = {year: idx for idx, year in enumerate(years)}
|
||||
|
|
@ -288,8 +374,8 @@ def transform_crime_spatial(
|
|||
transformer = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
|
||||
_accumulate_counts(csvs, tree, type_to_idx, year_to_idx, transformer, counts)
|
||||
|
||||
_write_avg_yr(postcodes, counts, valid_month_count, output_path)
|
||||
_write_by_year(postcodes, counts, years, months_in_year, by_year_output_path)
|
||||
_write_avg_yr(postcodes, counts, years, months_in_year, norm, output_path)
|
||||
_write_by_year(postcodes, counts, years, months_in_year, norm, by_year_output_path)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
|
|
|||
|
|
@ -116,6 +116,66 @@ TREE_DENSITY_FEATURE = "Street tree density percentile"
|
|||
_POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
|
||||
r"^Tree canopy density percentile within \d+m$"
|
||||
)
|
||||
_FINAL_DROP_COLUMNS = [
|
||||
"inspection_date",
|
||||
"_bedrooms",
|
||||
"LSOA name (2021)",
|
||||
"Local Authority District code (2024)",
|
||||
"Local Authority District name (2024)",
|
||||
"Wider Barriers Sub-domain Score",
|
||||
"Geographical Barriers Sub-domain Score",
|
||||
"Adult Skills Sub-domain Score",
|
||||
"Children and Young People Sub-domain Score",
|
||||
"Crime Score",
|
||||
"Living Environment Score",
|
||||
"Index of Multiple Deprivation (IMD) Score",
|
||||
"Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
|
||||
"Income Deprivation Affecting Children Index (IDACI) Score (rate)",
|
||||
"Barriers to Housing and Services Score",
|
||||
"oa21",
|
||||
"pcon",
|
||||
"epc_property_type",
|
||||
"pp_property_type",
|
||||
"built_form",
|
||||
]
|
||||
_FINAL_RENAME_COLUMNS = {
|
||||
"date_of_transfer": "Date of last transaction",
|
||||
"construction_age_band": "Construction year",
|
||||
"is_construction_date_approximate": "Is construction date approximate",
|
||||
"Income Score (rate)": "Income Score",
|
||||
"Employment Score (rate)": "Employment Score",
|
||||
"Indoors Sub-domain Score": "Housing Conditions Score",
|
||||
"Outdoors Sub-domain Score": "Air Quality and Road Safety Score",
|
||||
"pp_address": "Address per Property Register",
|
||||
"epc_address": "Address per EPC",
|
||||
"postcode": "Postcode",
|
||||
"duration": "Leasehold/Freehold",
|
||||
"current_energy_rating": "Current energy rating",
|
||||
"potential_energy_rating": "Potential energy rating",
|
||||
"total_floor_area": "Total floor area (sqm)",
|
||||
"property_type": "Property type",
|
||||
"restaurants_2km": "Number of restaurants within 2km",
|
||||
"groceries_2km": "Number of grocery shops and supermarkets within 2km",
|
||||
"latest_price": "Last known price",
|
||||
"number_habitable_rooms": "Number of bedrooms & living rooms",
|
||||
"noise_lden_db": "Noise (dB)",
|
||||
"good_primary_5km": "Good+ primary schools within 5km",
|
||||
"good_secondary_5km": "Good+ secondary schools within 5km",
|
||||
"good_primary_2km": "Good+ primary schools within 2km",
|
||||
"good_secondary_2km": "Good+ secondary schools within 2km",
|
||||
"outstanding_primary_5km": "Outstanding primary schools within 5km",
|
||||
"outstanding_secondary_5km": "Outstanding secondary schools within 5km",
|
||||
"outstanding_primary_2km": "Outstanding primary schools within 2km",
|
||||
"outstanding_secondary_2km": "Outstanding secondary schools within 2km",
|
||||
"max_download_speed": "Max available download speed (Mbps)",
|
||||
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||
"mean_monthly_rent": "Estimated monthly rent",
|
||||
"floor_height": "Interior height (m)",
|
||||
"was_council_house": "Former council house",
|
||||
"median_age": "Median age",
|
||||
"turnout_pct": "Voter turnout (%)",
|
||||
}
|
||||
_RENT_SOURCE_UNAVAILABLE_LADS = {
|
||||
# ONS PIPR does not publish LAD-level private-rent estimates for these
|
||||
# small authorities. Keep rent null there, but fail on any other LAD miss.
|
||||
|
|
@ -707,6 +767,181 @@ def _validate_property_postcodes(df: pl.DataFrame) -> None:
|
|||
)
|
||||
|
||||
|
||||
def _active_english_postcode_area(arcgis_raw: pl.LazyFrame) -> pl.LazyFrame:
|
||||
"""Return the supported postcode universe with geography join keys."""
|
||||
return (
|
||||
arcgis_raw.filter(pl.col("ctry25cd") == "E92000001")
|
||||
.filter(pl.col("doterm").is_null())
|
||||
.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
"lat",
|
||||
pl.col("long").alias("lon"),
|
||||
"ctry25cd",
|
||||
pl.col("lsoa21cd").alias("lsoa21"),
|
||||
pl.col("oa21cd").alias("oa21"),
|
||||
pl.col("pcon24cd").alias("pcon"),
|
||||
)
|
||||
.drop_nulls(["postcode"])
|
||||
.unique(["postcode"])
|
||||
)
|
||||
|
||||
|
||||
def _remap_terminated_postcodes(
|
||||
wide: pl.LazyFrame, postcode_mapping: pl.LazyFrame
|
||||
) -> pl.LazyFrame:
|
||||
return (
|
||||
wide.join(
|
||||
postcode_mapping,
|
||||
left_on="postcode",
|
||||
right_on="old_postcode",
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
pl.coalesce("new_postcode", "postcode").alias("postcode"),
|
||||
)
|
||||
.drop("new_postcode")
|
||||
)
|
||||
|
||||
|
||||
def _filter_to_active_english_postcodes(
|
||||
wide: pl.LazyFrame, active_postcodes: pl.LazyFrame
|
||||
) -> pl.LazyFrame:
|
||||
return wide.join(active_postcodes, on="postcode", how="semi")
|
||||
|
||||
|
||||
def _join_area_side_tables(
|
||||
base: pl.LazyFrame,
|
||||
*,
|
||||
iod: pl.LazyFrame,
|
||||
ethnicity: pl.LazyFrame,
|
||||
crime: pl.LazyFrame,
|
||||
median_age: pl.LazyFrame,
|
||||
election: pl.LazyFrame,
|
||||
poi_counts: pl.LazyFrame,
|
||||
noise: pl.LazyFrame,
|
||||
school_proximity: pl.LazyFrame,
|
||||
conservation_areas: pl.LazyFrame,
|
||||
tree_density: pl.LazyFrame | None,
|
||||
broadband: pl.LazyFrame,
|
||||
) -> pl.LazyFrame:
|
||||
base = base.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
|
||||
base = base.join(
|
||||
ethnicity,
|
||||
left_on="Local Authority District code (2024)",
|
||||
right_on="Geography_code",
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Crime is counted spatially per postcode (incidents within 50m of the
|
||||
# postcode boundary), so it joins on postcode rather than LSOA.
|
||||
base = base.join(crime, on="postcode", how="left")
|
||||
base = base.with_columns(
|
||||
pl.sum_horizontal(
|
||||
"Violence and sexual offences (avg/yr)",
|
||||
"Robbery (avg/yr)",
|
||||
"Burglary (avg/yr)",
|
||||
"Possession of weapons (avg/yr)",
|
||||
).alias("serious_crime_avg_yr"),
|
||||
pl.sum_horizontal(
|
||||
"Anti-social behaviour (avg/yr)",
|
||||
"Criminal damage and arson (avg/yr)",
|
||||
"Shoplifting (avg/yr)",
|
||||
"Bicycle theft (avg/yr)",
|
||||
"Theft from the person (avg/yr)",
|
||||
"Other theft (avg/yr)",
|
||||
"Vehicle crime (avg/yr)",
|
||||
"Public order (avg/yr)",
|
||||
"Drugs (avg/yr)",
|
||||
"Other crime (avg/yr)",
|
||||
).alias("minor_crime_avg_yr"),
|
||||
)
|
||||
|
||||
base = base.join(median_age, on="lsoa21", how="left")
|
||||
base = base.join(election, on="pcon", how="left")
|
||||
base = base.join(poi_counts, on="postcode", how="left")
|
||||
base = base.join(noise, on="postcode", how="left")
|
||||
base = base.join(school_proximity, on="postcode", how="left")
|
||||
base = base.join(conservation_areas, on="postcode", how="left").with_columns(
|
||||
pl.col(CONSERVATION_AREA_FEATURE).fill_null("No")
|
||||
)
|
||||
if tree_density is not None:
|
||||
base = base.join(tree_density, on="postcode", how="left")
|
||||
return base.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
|
||||
|
||||
|
||||
def _finalize_merged_columns(frame: pl.LazyFrame) -> pl.LazyFrame:
|
||||
return frame.drop(_FINAL_DROP_COLUMNS, strict=False).rename(
|
||||
_FINAL_RENAME_COLUMNS, strict=False
|
||||
)
|
||||
|
||||
|
||||
def _area_columns_from(columns: list[str]) -> list[str]:
|
||||
return [
|
||||
c for c in columns if c in _AREA_COLUMNS or _is_dynamic_poi_metric_column(c)
|
||||
]
|
||||
|
||||
|
||||
def _property_columns_from(columns: list[str]) -> list[str]:
|
||||
return [
|
||||
c
|
||||
for c in columns
|
||||
if (c not in _AREA_COLUMNS and not _is_dynamic_poi_metric_column(c))
|
||||
or c == "Postcode"
|
||||
]
|
||||
|
||||
|
||||
def _validate_postcode_feature_output(
|
||||
postcode_df: pl.DataFrame, expected_postcode_count: int
|
||||
) -> None:
|
||||
required = {"Postcode", "lat", "lon", "ctry25cd"}
|
||||
missing = sorted(required - set(postcode_df.columns))
|
||||
if missing:
|
||||
raise ValueError(f"Postcode feature output missing columns: {missing}")
|
||||
|
||||
unique_count = postcode_df["Postcode"].n_unique()
|
||||
if (
|
||||
postcode_df.height != expected_postcode_count
|
||||
or unique_count != expected_postcode_count
|
||||
):
|
||||
raise ValueError(
|
||||
"Postcode feature output no longer matches the active England "
|
||||
"postcode universe: "
|
||||
f"rows={postcode_df.height:,}, unique={unique_count:,}, "
|
||||
f"expected={expected_postcode_count:,}"
|
||||
)
|
||||
|
||||
invalid = postcode_df.filter(
|
||||
pl.col("Postcode").is_null()
|
||||
| (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
|
||||
| pl.col("lat").is_null()
|
||||
| pl.col("lon").is_null()
|
||||
| pl.col("ctry25cd").is_null()
|
||||
| (pl.col("ctry25cd") != "E92000001")
|
||||
)
|
||||
if invalid.height > 0:
|
||||
sample = (
|
||||
invalid.select("Postcode", "ctry25cd", "lat", "lon").head(10).to_dicts()
|
||||
)
|
||||
raise ValueError(
|
||||
"Postcode feature output contains unsupported or ungeocoded rows: "
|
||||
f"{invalid.height} rows. Sample: {sample}"
|
||||
)
|
||||
|
||||
|
||||
def _split_normal_outputs(
|
||||
df: pl.DataFrame,
|
||||
postcode_features: pl.DataFrame,
|
||||
*,
|
||||
expected_postcode_count: int,
|
||||
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
||||
postcode_df = postcode_features.select(
|
||||
_area_columns_from(postcode_features.columns)
|
||||
)
|
||||
_validate_postcode_feature_output(postcode_df, expected_postcode_count)
|
||||
properties_df = df.select(_property_columns_from(df.columns))
|
||||
return postcode_df, properties_df
|
||||
|
||||
|
||||
# Map listings-parquet source columns to the `_actual_*` overlay columns
|
||||
# carried alongside the wide frame through the postcode-keyed joins. After the
|
||||
# rest of the pipeline finalises, listing rows pick their canonical dashboard
|
||||
|
|
@ -927,9 +1162,7 @@ def _best_listing_match(
|
|||
return best, float(best_score), "address", best_field
|
||||
|
||||
|
||||
def _load_listings_for_merge(
|
||||
listings_path: Path, arcgis_path: Path
|
||||
) -> pl.DataFrame:
|
||||
def _load_listings_for_merge(listings_path: Path, arcgis_path: Path) -> pl.DataFrame:
|
||||
"""Read the listings parquet and prepare it for the wide-frame merge.
|
||||
|
||||
Output is keyed by `_listing_idx` and carries:
|
||||
|
|
@ -1032,7 +1265,11 @@ def _load_direct_epc_candidates(
|
|||
"_direct_epc_outcode": pl.Utf8,
|
||||
"_direct_epc_canonical_property_type": pl.Utf8,
|
||||
"_direct_epc_uprn": pl.Utf8,
|
||||
**{column: dtype for column, dtype in _DIRECT_EPC_COLUMNS if column.startswith("_direct_")},
|
||||
**{
|
||||
column: dtype
|
||||
for column, dtype in _DIRECT_EPC_COLUMNS
|
||||
if column.startswith("_direct_")
|
||||
},
|
||||
}
|
||||
if not listing_outcodes:
|
||||
return pl.DataFrame(schema=schema)
|
||||
|
|
@ -1089,9 +1326,7 @@ def _load_direct_epc_candidates(
|
|||
pl.col("epc_address").alias("_direct_epc_address"),
|
||||
pl.col("uprn").alias("_direct_epc_uprn"),
|
||||
pl.col("total_floor_area").alias("_direct_total_floor_area"),
|
||||
pl.col("number_habitable_rooms").alias(
|
||||
"_direct_number_habitable_rooms"
|
||||
),
|
||||
pl.col("number_habitable_rooms").alias("_direct_number_habitable_rooms"),
|
||||
pl.col("floor_height").alias("_direct_floor_height"),
|
||||
pl.col("_direct_was_council_house").fill_null("No"),
|
||||
)
|
||||
|
|
@ -1141,9 +1376,7 @@ def _listing_match_frame(listings: pl.DataFrame) -> pl.DataFrame:
|
|||
)
|
||||
|
||||
|
||||
def _optional_lazy_col(
|
||||
schema: pl.Schema, column: str, dtype: pl.DataType
|
||||
) -> pl.Expr:
|
||||
def _optional_lazy_col(schema: pl.Schema, column: str, dtype: pl.DataType) -> pl.Expr:
|
||||
if column in schema:
|
||||
return pl.col(column).cast(dtype, strict=False).alias(column)
|
||||
return pl.lit(None, dtype=dtype).alias(column)
|
||||
|
|
@ -1640,27 +1873,18 @@ def _build(
|
|||
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||
)
|
||||
|
||||
# Remap terminated postcodes to nearest active successor
|
||||
# Remap terminated postcodes to nearest active successor before filtering to
|
||||
# the supported active-English postcode universe. Historical properties from
|
||||
# terminated English postcodes are retained under their successor postcode.
|
||||
postcode_mapping = build_postcode_mapping(arcgis_path)
|
||||
wide = (
|
||||
wide.join(
|
||||
postcode_mapping.lazy(),
|
||||
left_on="postcode",
|
||||
right_on="old_postcode",
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
pl.coalesce("new_postcode", "postcode").alias("postcode"),
|
||||
)
|
||||
.drop("new_postcode")
|
||||
)
|
||||
|
||||
wide = _remap_terminated_postcodes(wide, postcode_mapping.lazy())
|
||||
arcgis_raw = pl.scan_parquet(arcgis_path)
|
||||
postcode_country = arcgis_raw.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
pl.col("ctry25cd"),
|
||||
).unique(["postcode"])
|
||||
wide = wide.join(postcode_country, on="postcode", how="left")
|
||||
arcgis = _active_english_postcode_area(arcgis_raw)
|
||||
active_postcodes = arcgis.select("postcode").unique()
|
||||
active_postcode_count = (
|
||||
active_postcodes.select(pl.len()).collect(engine="streaming").item()
|
||||
)
|
||||
wide = _filter_to_active_english_postcodes(wide, active_postcodes)
|
||||
|
||||
if listed_buildings_path is not None:
|
||||
active_postcodes_for_listed = (
|
||||
|
|
@ -1691,92 +1915,25 @@ def _build(
|
|||
arcgis_path,
|
||||
epc_path=actual_listings_epc_path,
|
||||
)
|
||||
wide = _filter_to_active_english_postcodes(wide, active_postcodes)
|
||||
|
||||
wide = wide.with_columns(pl.col(LISTED_BUILDING_FEATURE).fill_null("No"))
|
||||
|
||||
arcgis = (
|
||||
arcgis_raw.filter(pl.col("ctry25cd") == "E92000001") # England only
|
||||
.filter(pl.col("doterm").is_null()) # Active postcodes only
|
||||
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
|
||||
# Alias them back to the short canonical names used across the
|
||||
# pipeline so downstream joins don't need to know about NSPL's
|
||||
# versioning scheme.
|
||||
.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
"lat",
|
||||
pl.col("long").alias("lon"),
|
||||
pl.col("lsoa21cd").alias("lsoa21"),
|
||||
pl.col("oa21cd").alias("oa21"),
|
||||
pl.col("pcon24cd").alias("pcon"),
|
||||
)
|
||||
)
|
||||
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
|
||||
# `_active_english_postcode_area` aliases them back to the short canonical
|
||||
# names used across the pipeline so downstream joins don't need to know
|
||||
# about NSPL's versioning scheme.
|
||||
wide = wide.join(arcgis, on="postcode", how="left")
|
||||
postcode_area = arcgis
|
||||
|
||||
iod = pl.scan_parquet(iod_path).with_columns(
|
||||
*(_less_deprived_percentile_expr(c) for c in _IOD_PERCENTILE_COLUMNS)
|
||||
)
|
||||
wide = wide.join(iod, left_on="lsoa21", right_on="LSOA code (2021)", how="left")
|
||||
|
||||
ethnicity = pl.scan_parquet(ethnicity_path)
|
||||
wide = wide.join(
|
||||
ethnicity,
|
||||
left_on="Local Authority District code (2024)",
|
||||
right_on="Geography_code",
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Derive bedroom count: habitable rooms - 1 (assuming 1 reception room), clipped to 0..4
|
||||
wide = wide.with_columns(
|
||||
(pl.col("number_habitable_rooms") - 1)
|
||||
.clip(0, 4)
|
||||
.cast(pl.UInt8)
|
||||
.alias("_bedrooms"),
|
||||
)
|
||||
rental = pl.scan_parquet(rental_prices_path).select(
|
||||
"area_code", "bedrooms", "mean_monthly_rent"
|
||||
)
|
||||
wide = wide.join(
|
||||
rental,
|
||||
left_on=["Local Authority District code (2024)", "_bedrooms"],
|
||||
right_on=["area_code", "bedrooms"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Crime is counted spatially per postcode (incidents within 50m of the
|
||||
# postcode boundary), so it joins on postcode rather than LSOA.
|
||||
crime = pl.scan_parquet(crime_path)
|
||||
wide = wide.join(crime, on="postcode", how="left")
|
||||
|
||||
wide = wide.with_columns(
|
||||
pl.sum_horizontal(
|
||||
"Violence and sexual offences (avg/yr)",
|
||||
"Robbery (avg/yr)",
|
||||
"Burglary (avg/yr)",
|
||||
"Possession of weapons (avg/yr)",
|
||||
).alias("serious_crime_avg_yr"),
|
||||
pl.sum_horizontal(
|
||||
"Anti-social behaviour (avg/yr)",
|
||||
"Criminal damage and arson (avg/yr)",
|
||||
"Shoplifting (avg/yr)",
|
||||
"Bicycle theft (avg/yr)",
|
||||
"Theft from the person (avg/yr)",
|
||||
"Other theft (avg/yr)",
|
||||
"Vehicle crime (avg/yr)",
|
||||
"Public order (avg/yr)",
|
||||
"Drugs (avg/yr)",
|
||||
"Other crime (avg/yr)",
|
||||
).alias("minor_crime_avg_yr"),
|
||||
)
|
||||
|
||||
median_age = pl.scan_parquet(median_age_path)
|
||||
wide = wide.join(median_age, on="lsoa21", how="left")
|
||||
|
||||
election = pl.scan_parquet(election_results_path)
|
||||
wide = wide.join(election, on="pcon", how="left")
|
||||
|
||||
poi_counts = pl.scan_parquet(poi_proximity_path)
|
||||
wide = wide.join(poi_counts, on="postcode", how="left")
|
||||
|
||||
noise_cols = ["road_noise_lden_db", "rail_noise_lden_db", "airport_noise_lden_db"]
|
||||
noise = (
|
||||
pl.scan_parquet(noise_path)
|
||||
|
|
@ -1789,21 +1946,13 @@ def _build(
|
|||
)
|
||||
.select("postcode", "noise_lden_db")
|
||||
)
|
||||
wide = wide.join(noise, on="postcode", how="left")
|
||||
|
||||
school_proximity = pl.scan_parquet(school_proximity_path)
|
||||
wide = wide.join(school_proximity, on="postcode", how="left")
|
||||
|
||||
conservation_areas = _conservation_area_by_postcode(
|
||||
arcgis.select("postcode", "lat", "lon"), conservation_areas_path
|
||||
)
|
||||
wide = wide.join(conservation_areas, on="postcode", how="left").with_columns(
|
||||
pl.col(CONSERVATION_AREA_FEATURE).fill_null("No")
|
||||
)
|
||||
|
||||
tree_density = None
|
||||
if tree_density_postcodes_path is not None:
|
||||
tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
|
||||
wide = wide.join(tree_density, on="postcode", how="left")
|
||||
|
||||
# Broadband: derive max available download speed tier per postcode from
|
||||
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
|
||||
|
|
@ -1828,7 +1977,38 @@ def _build(
|
|||
.agg(pl.col("max_download_speed").max())
|
||||
.with_columns(pl.col("max_download_speed").cast(pl.Utf8))
|
||||
)
|
||||
wide = wide.join(broadband, left_on="postcode", right_on="bb_postcode", how="left")
|
||||
area_side_tables = {
|
||||
"iod": iod,
|
||||
"ethnicity": ethnicity,
|
||||
"crime": crime,
|
||||
"median_age": median_age,
|
||||
"election": election,
|
||||
"poi_counts": poi_counts,
|
||||
"noise": noise,
|
||||
"school_proximity": school_proximity,
|
||||
"conservation_areas": conservation_areas,
|
||||
"tree_density": tree_density,
|
||||
"broadband": broadband,
|
||||
}
|
||||
wide = _join_area_side_tables(wide, **area_side_tables)
|
||||
postcode_area = _join_area_side_tables(postcode_area, **area_side_tables)
|
||||
|
||||
# Derive bedroom count: habitable rooms - 1 (assuming 1 reception room), clipped to 0..4
|
||||
wide = wide.with_columns(
|
||||
(pl.col("number_habitable_rooms") - 1)
|
||||
.clip(0, 4)
|
||||
.cast(pl.UInt8)
|
||||
.alias("_bedrooms"),
|
||||
)
|
||||
rental = pl.scan_parquet(rental_prices_path).select(
|
||||
"area_code", "bedrooms", "mean_monthly_rent"
|
||||
)
|
||||
wide = wide.join(
|
||||
rental,
|
||||
left_on=["Local Authority District code (2024)", "_bedrooms"],
|
||||
right_on=["area_code", "bedrooms"],
|
||||
how="left",
|
||||
)
|
||||
|
||||
# Derive property_type: prefer EPC data, fall back to price-paid.
|
||||
# For Houses, use built_form (e.g. Semi-Detached, Mid-Terrace) for finer detail.
|
||||
|
|
@ -1862,112 +2042,40 @@ def _build(
|
|||
.alias("property_type")
|
||||
)
|
||||
|
||||
wide = (
|
||||
wide.with_columns(
|
||||
pl.when(pl.col("duration") == "U")
|
||||
.then(None)
|
||||
.otherwise(pl.col("duration"))
|
||||
.alias("duration"),
|
||||
pl.when(pl.col("current_energy_rating") == "INVALID!")
|
||||
.then(None)
|
||||
.otherwise(pl.col("current_energy_rating"))
|
||||
.alias("current_energy_rating"),
|
||||
)
|
||||
.with_columns(
|
||||
(pl.col("latest_price") / pl.col("total_floor_area"))
|
||||
.round(0)
|
||||
.cast(pl.Int32)
|
||||
.alias("Price per sqm"),
|
||||
)
|
||||
.drop(
|
||||
"inspection_date",
|
||||
"_bedrooms",
|
||||
"LSOA name (2021)",
|
||||
"Local Authority District code (2024)",
|
||||
"Local Authority District name (2024)",
|
||||
"Wider Barriers Sub-domain Score",
|
||||
"Geographical Barriers Sub-domain Score",
|
||||
"Adult Skills Sub-domain Score",
|
||||
"Children and Young People Sub-domain Score",
|
||||
"Crime Score",
|
||||
"Living Environment Score",
|
||||
"Index of Multiple Deprivation (IMD) Score",
|
||||
"Income Deprivation Affecting Older People (IDAOPI) Score (rate)",
|
||||
"Income Deprivation Affecting Children Index (IDACI) Score (rate)",
|
||||
"Barriers to Housing and Services Score",
|
||||
"oa21",
|
||||
"pcon",
|
||||
"epc_property_type",
|
||||
"pp_property_type",
|
||||
"built_form",
|
||||
)
|
||||
.rename(
|
||||
{
|
||||
"date_of_transfer": "Date of last transaction",
|
||||
"construction_age_band": "Construction year",
|
||||
"is_construction_date_approximate": "Is construction date approximate",
|
||||
"Income Score (rate)": "Income Score",
|
||||
"Employment Score (rate)": "Employment Score",
|
||||
"Indoors Sub-domain Score": "Housing Conditions Score",
|
||||
"Outdoors Sub-domain Score": "Air Quality and Road Safety Score",
|
||||
"pp_address": "Address per Property Register",
|
||||
"epc_address": "Address per EPC",
|
||||
"postcode": "Postcode",
|
||||
"duration": "Leasehold/Freehold",
|
||||
"current_energy_rating": "Current energy rating",
|
||||
"potential_energy_rating": "Potential energy rating",
|
||||
"total_floor_area": "Total floor area (sqm)",
|
||||
"property_type": "Property type",
|
||||
"restaurants_2km": "Number of restaurants within 2km",
|
||||
"groceries_2km": "Number of grocery shops and supermarkets within 2km",
|
||||
"latest_price": "Last known price",
|
||||
"number_habitable_rooms": "Number of bedrooms & living rooms",
|
||||
"noise_lden_db": "Noise (dB)",
|
||||
"good_primary_5km": "Good+ primary schools within 5km",
|
||||
"good_secondary_5km": "Good+ secondary schools within 5km",
|
||||
"good_primary_2km": "Good+ primary schools within 2km",
|
||||
"good_secondary_2km": "Good+ secondary schools within 2km",
|
||||
"outstanding_primary_5km": "Outstanding primary schools within 5km",
|
||||
"outstanding_secondary_5km": "Outstanding secondary schools within 5km",
|
||||
"outstanding_primary_2km": "Outstanding primary schools within 2km",
|
||||
"outstanding_secondary_2km": "Outstanding secondary schools within 2km",
|
||||
"max_download_speed": "Max available download speed (Mbps)",
|
||||
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||
"mean_monthly_rent": "Estimated monthly rent",
|
||||
"floor_height": "Interior height (m)",
|
||||
"was_council_house": "Former council house",
|
||||
"median_age": "Median age",
|
||||
"turnout_pct": "Voter turnout (%)",
|
||||
}
|
||||
)
|
||||
wide = wide.with_columns(
|
||||
pl.when(pl.col("duration") == "U")
|
||||
.then(None)
|
||||
.otherwise(pl.col("duration"))
|
||||
.alias("duration"),
|
||||
pl.when(pl.col("current_energy_rating") == "INVALID!")
|
||||
.then(None)
|
||||
.otherwise(pl.col("current_energy_rating"))
|
||||
.alias("current_energy_rating"),
|
||||
).with_columns(
|
||||
(pl.col("latest_price") / pl.col("total_floor_area"))
|
||||
.round(0)
|
||||
.cast(pl.Int32)
|
||||
.alias("Price per sqm"),
|
||||
)
|
||||
wide = _finalize_merged_columns(wide)
|
||||
postcode_area = _finalize_merged_columns(postcode_area)
|
||||
|
||||
print("Collecting with streaming engine...")
|
||||
df = wide.collect(engine="streaming")
|
||||
|
||||
if mode == "listings":
|
||||
df = wide.collect(engine="streaming")
|
||||
enriched_listings = _finalize_listings(df)
|
||||
_validate_property_postcodes(enriched_listings)
|
||||
print(f"Enriched listings rows: {enriched_listings.height}")
|
||||
return _BuildResult(listings=enriched_listings)
|
||||
|
||||
df, postcode_features = pl.collect_all([wide, postcode_area], engine="streaming")
|
||||
_validate_property_postcodes(df)
|
||||
|
||||
# Split into postcode-level and property-level dataframes
|
||||
area_cols = [
|
||||
c for c in df.columns if c in _AREA_COLUMNS or _is_dynamic_poi_metric_column(c)
|
||||
]
|
||||
postcode_df = df.select(area_cols).group_by("Postcode").first()
|
||||
postcode_df, properties_df = _split_normal_outputs(
|
||||
df, postcode_features, expected_postcode_count=active_postcode_count
|
||||
)
|
||||
print(f"Postcode rows: {postcode_df.height} (unique postcodes)")
|
||||
|
||||
property_cols = [
|
||||
c
|
||||
for c in df.columns
|
||||
if (c not in _AREA_COLUMNS and not _is_dynamic_poi_metric_column(c))
|
||||
or c == "Postcode"
|
||||
]
|
||||
properties_df = df.select(property_cols)
|
||||
print(f"Property rows: {properties_df.height}")
|
||||
|
||||
return _BuildResult(postcode=postcode_df, properties=properties_df)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import json
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
from pyproj import Transformer
|
||||
from shapely import make_valid
|
||||
from shapely.geometry import MultiPolygon, Polygon
|
||||
from shapely import make_valid, set_precision
|
||||
from shapely.geometry import MultiPolygon, Polygon, mapping, shape
|
||||
from shapely.ops import transform as transform_geometry
|
||||
from shapely.ops import unary_union
|
||||
from tqdm import tqdm
|
||||
|
||||
|
|
@ -18,49 +20,47 @@ def _get_to_wgs84():
|
|||
return _to_wgs84
|
||||
|
||||
|
||||
def _largest_polygonal(geom) -> Polygon | None:
|
||||
if geom is None or geom.is_empty:
|
||||
return None
|
||||
if not geom.is_valid:
|
||||
geom = make_valid(geom)
|
||||
if geom.geom_type == "Polygon":
|
||||
return geom
|
||||
if geom.geom_type == "MultiPolygon":
|
||||
return max(geom.geoms, key=lambda g: g.area)
|
||||
if geom.geom_type == "GeometryCollection":
|
||||
polygons = [
|
||||
polygon
|
||||
for part in geom.geoms
|
||||
if (polygon := _largest_polygonal(part)) is not None
|
||||
]
|
||||
if polygons:
|
||||
return max(polygons, key=lambda g: g.area)
|
||||
return None
|
||||
|
||||
|
||||
def to_wgs84_geojson(
|
||||
geom: Polygon | MultiPolygon, tolerance: float = 1.0
|
||||
) -> dict | None:
|
||||
"""Simplify geometry in BNG, convert to WGS84, return GeoJSON dict."""
|
||||
if geom.is_empty:
|
||||
geom = _largest_polygonal(geom)
|
||||
if geom is None:
|
||||
return None
|
||||
|
||||
simplified = geom.simplify(tolerance, preserve_topology=True)
|
||||
if simplified.is_empty:
|
||||
simplified = _largest_polygonal(simplified)
|
||||
if simplified is None:
|
||||
return None
|
||||
|
||||
transformer = _get_to_wgs84()
|
||||
|
||||
def transform_ring(coords):
|
||||
xs, ys = zip(*coords)
|
||||
lons, lats = transformer.transform(list(xs), list(ys))
|
||||
return [(round(lon, 6), round(lat, 6)) for lon, lat in zip(lons, lats)]
|
||||
|
||||
def transform_polygon(poly):
|
||||
exterior = transform_ring(poly.exterior.coords)
|
||||
holes = [transform_ring(h.coords) for h in poly.interiors]
|
||||
return [exterior] + holes
|
||||
|
||||
# Force single Polygon — postcodes are contiguous delivery routes
|
||||
if simplified.geom_type == "MultiPolygon":
|
||||
simplified = max(simplified.geoms, key=lambda g: g.area)
|
||||
elif simplified.geom_type == "GeometryCollection":
|
||||
polys = [
|
||||
g for g in simplified.geoms if g.geom_type in ("Polygon", "MultiPolygon")
|
||||
]
|
||||
if not polys:
|
||||
return None
|
||||
simplified = max(polys, key=lambda g: g.area)
|
||||
if simplified.geom_type == "MultiPolygon":
|
||||
simplified = max(simplified.geoms, key=lambda g: g.area)
|
||||
|
||||
if simplified.geom_type != "Polygon" or simplified.is_empty:
|
||||
wgs84 = transform_geometry(transformer.transform, simplified)
|
||||
wgs84 = set_precision(wgs84, 0.000001, mode="valid_output")
|
||||
wgs84 = _largest_polygonal(wgs84)
|
||||
if wgs84 is None:
|
||||
return None
|
||||
|
||||
return {
|
||||
"type": "Polygon",
|
||||
"coordinates": transform_polygon(simplified),
|
||||
}
|
||||
return mapping(wgs84)
|
||||
|
||||
|
||||
def _fill_holes(geom):
|
||||
|
|
@ -132,7 +132,11 @@ def write_district_geojson(
|
|||
) -> int:
|
||||
"""Group postcodes by district, write GeoJSON files. Returns file count."""
|
||||
units_dir = output_dir / "units"
|
||||
units_dir.mkdir(parents=True, exist_ok=True)
|
||||
tmp_units_dir = output_dir / "units.tmp"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
if tmp_units_dir.exists():
|
||||
shutil.rmtree(tmp_units_dir)
|
||||
tmp_units_dir.mkdir(parents=True)
|
||||
|
||||
by_district: dict[str, list[tuple[str, Polygon | MultiPolygon]]] = defaultdict(list)
|
||||
for pc, geom in postcodes.items():
|
||||
|
|
@ -141,14 +145,23 @@ def write_district_geojson(
|
|||
by_district[district].append((pc, geom))
|
||||
|
||||
file_count = 0
|
||||
seen_postcodes: set[str] = set()
|
||||
for district, entries in tqdm(
|
||||
sorted(by_district.items()), desc="Writing GeoJSON", unit="file"
|
||||
):
|
||||
features = []
|
||||
for pc, geom in sorted(entries, key=lambda x: x[0]):
|
||||
if pc in seen_postcodes:
|
||||
raise ValueError(f"Duplicate postcode boundary feature: {pc}")
|
||||
seen_postcodes.add(pc)
|
||||
geojson_geom = to_wgs84_geojson(geom)
|
||||
if geojson_geom is None:
|
||||
continue
|
||||
raise ValueError(f"Postcode boundary collapsed to empty geometry: {pc}")
|
||||
written_geom = shape(geojson_geom)
|
||||
if written_geom.is_empty or not written_geom.is_valid:
|
||||
raise ValueError(
|
||||
f"Invalid postcode boundary geometry after output: {pc}"
|
||||
)
|
||||
mapit_code = pc.replace(" ", "")
|
||||
features.append(
|
||||
{
|
||||
|
|
@ -165,9 +178,12 @@ def write_district_geojson(
|
|||
continue
|
||||
|
||||
collection = {"type": "FeatureCollection", "features": features}
|
||||
out_path = units_dir / f"{district}.geojson"
|
||||
out_path = tmp_units_dir / f"{district}.geojson"
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(collection, f, separators=(",", ":"))
|
||||
file_count += 1
|
||||
|
||||
if units_dir.exists():
|
||||
shutil.rmtree(units_dir)
|
||||
tmp_units_dir.replace(units_dir)
|
||||
return file_count
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
Each test targets a specific bug or edge case identified during code review.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import pytest
|
||||
|
|
@ -11,7 +13,12 @@ from shapely.ops import unary_union
|
|||
|
||||
from .oa_boundaries import parse_gpkg_geometry
|
||||
from .greenspace import subtract_greenspace
|
||||
from .output import _fill_holes, merge_fragments, to_wgs84_geojson
|
||||
from .output import (
|
||||
_fill_holes,
|
||||
merge_fragments,
|
||||
to_wgs84_geojson,
|
||||
write_district_geojson,
|
||||
)
|
||||
from .process_oa import _extract_polygonal, process_oa
|
||||
from .uprn import get_oa_uprns, load_uprns
|
||||
from .voronoi import _equal_split_fallback, compute_voronoi_regions
|
||||
|
|
@ -154,6 +161,7 @@ class TestWhitespacePostcodes:
|
|||
"pcds": ["AA1 1AA", "AA1 1AB"],
|
||||
"east1m": [500010, 500030],
|
||||
"north1m": [180010, 180020],
|
||||
"oa21cd": ["E00000001", "E00000001"],
|
||||
"doterm": ["2020-01-01", None],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
}
|
||||
|
|
@ -165,6 +173,65 @@ class TestWhitespacePostcodes:
|
|||
|
||||
assert loaded_df["PCDS"].to_list() == ["AA1 1AB"]
|
||||
|
||||
def test_arcgis_filters_to_active_english_postcodes(self, tmp_path):
|
||||
uprns = pl.DataFrame(
|
||||
{
|
||||
"GRIDGB1E": [500010, 500020],
|
||||
"GRIDGB1N": [180010, 180020],
|
||||
"PCDS": ["AA1 1AA", "CF1 1AA"],
|
||||
"OA21CD": ["E00000001", "E00000001"],
|
||||
}
|
||||
)
|
||||
uprn_path = tmp_path / "uprn.parquet"
|
||||
uprns.write_parquet(uprn_path)
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "CF1 1AA"],
|
||||
"east1m": [500010, 300010],
|
||||
"north1m": [180010, 220010],
|
||||
"oa21cd": ["E00000001", "W00000001"],
|
||||
"doterm": [None, None],
|
||||
"ctry25cd": ["E92000001", "W92000004"],
|
||||
}
|
||||
)
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
arcgis.write_parquet(arcgis_path)
|
||||
|
||||
loaded_df, _offsets = load_uprns(uprn_path, arcgis_path)
|
||||
|
||||
assert loaded_df["PCDS"].to_list() == ["AA1 1AA"]
|
||||
|
||||
def test_arcgis_adds_centroid_seed_for_active_postcode_without_uprn(self, tmp_path):
|
||||
uprns = pl.DataFrame(
|
||||
{
|
||||
"GRIDGB1E": [500010],
|
||||
"GRIDGB1N": [180010],
|
||||
"PCDS": ["AA1 1AA"],
|
||||
"OA21CD": ["E00000001"],
|
||||
}
|
||||
)
|
||||
uprn_path = tmp_path / "uprn.parquet"
|
||||
uprns.write_parquet(uprn_path)
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "BB1 1BB"],
|
||||
"east1m": [500010, 510000],
|
||||
"north1m": [180010, 190000],
|
||||
"oa21cd": ["E00000001", "E00000002"],
|
||||
"doterm": [None, None],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
}
|
||||
)
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
arcgis.write_parquet(arcgis_path)
|
||||
|
||||
loaded_df, offsets = load_uprns(uprn_path, arcgis_path)
|
||||
|
||||
assert set(loaded_df["PCDS"].to_list()) == {"AA1 1AA", "BB1 1BB"}
|
||||
points, postcodes = get_oa_uprns(loaded_df, offsets, "E00000002")
|
||||
assert postcodes == ["BB1 1BB"]
|
||||
assert points.tolist() == [[510000.0, 190000.0]]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 3: Voronoi deduplication is first-seen-wins
|
||||
|
|
@ -450,7 +517,9 @@ class TestProcessOAInspireParcelAssignment:
|
|||
)
|
||||
postcodes = ["A", "B"]
|
||||
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
|
||||
fragments = process_oa(
|
||||
oa_geom, points, postcodes, inspire_candidates=[left, right]
|
||||
)
|
||||
frag_dict = dict(fragments)
|
||||
|
||||
assert "A" in frag_dict and "B" in frag_dict
|
||||
|
|
@ -494,7 +563,9 @@ class TestProcessOAInspireParcelAssignment:
|
|||
)
|
||||
postcodes = ["A", "B"]
|
||||
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
|
||||
fragments = process_oa(
|
||||
oa_geom, points, postcodes, inspire_candidates=[left, right]
|
||||
)
|
||||
frag_dict = dict(fragments)
|
||||
|
||||
assert "A" in frag_dict and "B" in frag_dict
|
||||
|
|
@ -539,7 +610,9 @@ class TestProcessOAInspireParcelAssignment:
|
|||
)
|
||||
postcodes = ["A", "B"]
|
||||
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[straddling])
|
||||
fragments = process_oa(
|
||||
oa_geom, points, postcodes, inspire_candidates=[straddling]
|
||||
)
|
||||
|
||||
for _, geom in fragments:
|
||||
assert geom.difference(oa_geom).area < 0.01
|
||||
|
|
@ -651,6 +724,22 @@ class TestToWgs84Geojson:
|
|||
assert lon_dp <= 6, f"Longitude {lon_s} has {lon_dp} decimal places"
|
||||
assert lat_dp <= 6, f"Latitude {lat_s} has {lat_dp} decimal places"
|
||||
|
||||
def test_write_district_geojson_replaces_stale_units(self, tmp_path):
|
||||
stale_units = tmp_path / "units"
|
||||
stale_units.mkdir()
|
||||
(stale_units / "ZZ1.geojson").write_text(
|
||||
json.dumps({"type": "FeatureCollection", "features": []})
|
||||
)
|
||||
|
||||
file_count = write_district_geojson(
|
||||
{"AA1 1AA": box(530000, 180000, 530100, 180100)}, tmp_path
|
||||
)
|
||||
|
||||
assert file_count == 1
|
||||
assert not (stale_units / "ZZ1.geojson").exists()
|
||||
written = json.loads((stale_units / "AA1.geojson").read_text())
|
||||
assert written["features"][0]["properties"]["postcodes"] == "AA1 1AA"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge case: parse_gpkg_geometry rejects unknown envelope types
|
||||
|
|
|
|||
|
|
@ -13,6 +13,33 @@ def _canonical_postcode_expr(name: str) -> pl.Expr:
|
|||
return pl.col(name).str.strip_chars().str.to_uppercase()
|
||||
|
||||
|
||||
def _active_english_arcgis_postcodes(arcgis_path: Path) -> pl.LazyFrame:
|
||||
return (
|
||||
pl.read_parquet(
|
||||
arcgis_path,
|
||||
columns=["pcds", "east1m", "north1m", "oa21cd", "ctry25cd", "doterm"],
|
||||
)
|
||||
.lazy()
|
||||
.filter(pl.col("ctry25cd") == "E92000001")
|
||||
.filter(pl.col("doterm").cast(pl.Utf8).is_null())
|
||||
.select(
|
||||
_canonical_postcode_expr("pcds").alias("PCDS"),
|
||||
pl.col("east1m").cast(pl.Float64).alias("GRIDGB1E"),
|
||||
pl.col("north1m").cast(pl.Float64).alias("GRIDGB1N"),
|
||||
pl.col("oa21cd").alias("OA21CD"),
|
||||
)
|
||||
.filter(
|
||||
pl.col("PCDS").is_not_null()
|
||||
& (pl.col("PCDS") != "")
|
||||
& pl.col("GRIDGB1E").is_not_null()
|
||||
& pl.col("GRIDGB1N").is_not_null()
|
||||
& pl.col("OA21CD").is_not_null()
|
||||
& pl.col("OA21CD").str.starts_with("E")
|
||||
)
|
||||
.unique("PCDS")
|
||||
)
|
||||
|
||||
|
||||
def load_uprns(
|
||||
uprn_path: Path, arcgis_path: Path | None = None
|
||||
) -> tuple[pl.DataFrame, dict[str, tuple[int, int]]]:
|
||||
|
|
@ -25,6 +52,7 @@ def load_uprns(
|
|||
|
||||
print("Loading UPRN lookup...")
|
||||
mapping = None
|
||||
active_postcode_points = None
|
||||
if arcgis_path is not None:
|
||||
mapping = (
|
||||
build_postcode_mapping(arcgis_path)
|
||||
|
|
@ -34,6 +62,7 @@ def load_uprns(
|
|||
)
|
||||
.unique("old_postcode")
|
||||
)
|
||||
active_postcode_points = _active_english_arcgis_postcodes(arcgis_path)
|
||||
|
||||
# Sort via streaming sink to avoid polars doubling memory during in-memory sort
|
||||
with tempfile.NamedTemporaryFile(
|
||||
|
|
@ -51,11 +80,21 @@ def load_uprns(
|
|||
|
||||
if mapping is not None and mapping.height > 0:
|
||||
uprns = (
|
||||
uprns.join(mapping.lazy(), left_on="PCDS", right_on="old_postcode", how="left")
|
||||
uprns.join(
|
||||
mapping.lazy(), left_on="PCDS", right_on="old_postcode", how="left"
|
||||
)
|
||||
.with_columns(pl.coalesce("new_postcode", "PCDS").alias("PCDS"))
|
||||
.select("GRIDGB1E", "GRIDGB1N", "PCDS", "OA21CD")
|
||||
)
|
||||
|
||||
if active_postcode_points is not None:
|
||||
active_postcodes = active_postcode_points.select("PCDS").unique()
|
||||
uprns = uprns.join(active_postcodes, on="PCDS", how="semi")
|
||||
missing_active = active_postcode_points.join(
|
||||
uprns.select("PCDS").unique(), on="PCDS", how="anti"
|
||||
).select("GRIDGB1E", "GRIDGB1N", "PCDS", "OA21CD")
|
||||
uprns = pl.concat([uprns, missing_active], how="vertical_relaxed")
|
||||
|
||||
uprns.sort("OA21CD").sink_parquet(tmp_path)
|
||||
release_memory()
|
||||
|
||||
|
|
|
|||
52
pipeline/transform/test_crime_hotspot_tiles.py
Normal file
52
pipeline/transform/test_crime_hotspot_tiles.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import json
|
||||
|
||||
from pipeline.transform.crime_hotspot_tiles import _write_geojsonseq
|
||||
|
||||
_HEADER = (
|
||||
"Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,"
|
||||
"LSOA code,LSOA name,Crime type,Last outcome category,Context"
|
||||
)
|
||||
|
||||
|
||||
def _row(lon, lat, month, crime_type):
|
||||
return f",{month},F,F,{lon},{lat},On or near X,E01000001,L,{crime_type},U,"
|
||||
|
||||
|
||||
def _write_csv(path, rows):
|
||||
path.write_text("\n".join([_HEADER, *rows]) + "\n")
|
||||
|
||||
|
||||
def test_write_geojsonseq_collapses_shared_anchors_into_weighted_features(tmp_path):
|
||||
csv = tmp_path / "2024-01-test-street.csv"
|
||||
_write_csv(
|
||||
csv,
|
||||
[
|
||||
# Two incidents snapped to the exact same anchor/month/type -> one
|
||||
# feature with count=2.
|
||||
_row(-0.1, 51.5, "2024-01", "Burglary"),
|
||||
_row(-0.1, 51.5, "2024-01", "Burglary"),
|
||||
# Same coord, different crime type -> kept separate (per-type filter).
|
||||
_row(-0.1, 51.5, "2024-01", "Robbery"),
|
||||
# Out of bounds -> dropped entirely.
|
||||
_row(-0.1, 80.0, "2024-01", "Burglary"),
|
||||
# Missing coordinate -> dropped entirely.
|
||||
_row("", "", "2024-01", "Burglary"),
|
||||
],
|
||||
)
|
||||
|
||||
out = tmp_path / "hotspots.geojsonseq"
|
||||
feature_count, incident_count = _write_geojsonseq([csv], out)
|
||||
|
||||
features = [json.loads(line) for line in out.read_text().splitlines()]
|
||||
assert feature_count == 2
|
||||
assert incident_count == 3 # 2 burglaries + 1 robbery, in-bounds only
|
||||
|
||||
by_type = {f["properties"]["crime_type"]: f["properties"] for f in features}
|
||||
# The busy anchor is a single feature carrying its full incident weight,
|
||||
# so tippecanoe's density thinning can no longer silently erase it.
|
||||
assert by_type["Burglary"]["count"] == 2
|
||||
assert by_type["Burglary"]["weight"] == 2
|
||||
assert by_type["Robbery"]["count"] == 1
|
||||
# Geometry preserved as [lon, lat].
|
||||
assert by_type["Burglary"]["count"] == 2
|
||||
assert all(f["geometry"]["coordinates"] == [-0.1, 51.5] for f in features)
|
||||
|
|
@ -1,9 +1,13 @@
|
|||
import json
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import pytest
|
||||
import shapely
|
||||
from pyproj import Transformer
|
||||
|
||||
from pipeline.transform.crime_spatial import transform_crime_spatial
|
||||
from pipeline.transform.postcode_boundaries.loader import load_postcode_polygons
|
||||
|
||||
_TO_WGS84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)
|
||||
|
||||
|
|
@ -82,7 +86,10 @@ def test_buffer_overlap_counts_for_each_postcode(tmp_path):
|
|||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year)
|
||||
# Pin the 50m buffer the geometry above was designed around (the production
|
||||
# default is now 100m). The three squares are equal-area, so area
|
||||
# normalisation leaves the counts unchanged.
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
rows = {
|
||||
r["postcode"]: r
|
||||
|
|
@ -127,7 +134,7 @@ def test_by_year_annualises_and_rolls_up(tmp_path):
|
|||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year)
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
by_year_df = pl.read_parquet(by_year)
|
||||
assert by_year_df.height == 1
|
||||
|
|
@ -145,3 +152,130 @@ def test_by_year_annualises_and_rolls_up(tmp_path):
|
|||
# 2023 serious = Burglary(12) + Robbery(12) = 24; 2024 = Burglary(12).
|
||||
assert serious[2023] == 24.0
|
||||
assert serious[2024] == 12.0
|
||||
|
||||
|
||||
def test_area_normalisation_divides_out_buffered_catchment(tmp_path):
|
||||
# Three postcodes of increasing footprint, each with exactly one incident in
|
||||
# its buffer. Normalisation rescales by median_catchment / buffered_area, so
|
||||
# the smallest scores highest and the median-sized one is unchanged -- i.e.
|
||||
# the metric is a density. Dividing by the *buffered* catchment (not the raw
|
||||
# polygon) means the fixed buffer-ring floor keeps the spread gentle, so the
|
||||
# tiniest postcode is not blown up out of proportion.
|
||||
units = tmp_path / "units"
|
||||
_write_boundaries(
|
||||
units,
|
||||
{
|
||||
"AB1": [
|
||||
_square_feature("AB1 1AA", 1000, 1000, 1010, 1010), # 10x10
|
||||
_square_feature("AB1 1AB", 3000, 3000, 3010, 3020), # 10x20 (median)
|
||||
_square_feature("AB1 1AC", 5000, 5000, 5020, 5020), # 20x20
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
crime = tmp_path / "crime"
|
||||
_write_month(
|
||||
crime,
|
||||
"2024-01",
|
||||
[
|
||||
_crime_row("2024-01", 1005, 1005, "Burglary"),
|
||||
_crime_row("2024-01", 3005, 3010, "Burglary"),
|
||||
_crime_row("2024-01", 5010, 5010, "Burglary"),
|
||||
],
|
||||
)
|
||||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
# Re-derive the expected values from the same buffered catchment areas: each
|
||||
# postcode is 12/yr before normalisation, then x (median_buf / buffered_area).
|
||||
postcodes, polygons = load_postcode_polygons(units)
|
||||
buf_area = {
|
||||
pc: float(shapely.area(shapely.buffer(poly, 50.0, quad_segs=8)))
|
||||
for pc, poly in zip(postcodes, polygons)
|
||||
}
|
||||
median_buf = float(np.median(list(buf_area.values())))
|
||||
expected = {pc: 12.0 * median_buf / buf_area[pc] for pc in buf_area}
|
||||
|
||||
rows = {r["postcode"]: r for r in pl.read_parquet(output).to_dicts()}
|
||||
for pc, exp in expected.items():
|
||||
assert rows[pc]["Burglary (avg/yr)"] == pytest.approx(exp, abs=0.1)
|
||||
|
||||
# Median catchment unchanged; ordering is by inverse buffered area, but the
|
||||
# buffer-ring floor keeps the spread far below the ~4x raw-area ratio.
|
||||
assert rows["AB1 1AB"]["Burglary (avg/yr)"] == pytest.approx(12.0, abs=0.05)
|
||||
small = rows["AB1 1AA"]["Burglary (avg/yr)"]
|
||||
big = rows["AB1 1AC"]["Burglary (avg/yr)"]
|
||||
assert small > 12.0 > big
|
||||
assert small / big < 1.5
|
||||
|
||||
# by-year series carries the same normalisation.
|
||||
by_year_df = pl.read_parquet(by_year)
|
||||
small_row = by_year_df.filter(pl.col("postcode") == "AB1 1AA").row(0, named=True)
|
||||
assert small_row["Burglary (by year)"] == [
|
||||
{"year": 2024, "count": pytest.approx(expected["AB1 1AA"], abs=0.1)}
|
||||
]
|
||||
|
||||
|
||||
def test_avg_yr_is_simple_mean_of_year_bars(tmp_path):
|
||||
# Uneven month coverage across years: 2023 has 1 month (2 incidents -> 24/yr),
|
||||
# 2024 has 2 months (2 incidents -> 12/yr). The headline must be the *simple*
|
||||
# mean of the bars (24+12)/2 = 18, not the month-weighted pooled rate
|
||||
# (4 incidents / 3 months * 12 = 16).
|
||||
units = tmp_path / "units"
|
||||
_write_boundaries(
|
||||
units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
|
||||
)
|
||||
|
||||
crime = tmp_path / "crime"
|
||||
_write_month(
|
||||
crime,
|
||||
"2023-01",
|
||||
[
|
||||
_crime_row("2023-01", 1005, 1005, "Burglary"),
|
||||
_crime_row("2023-01", 1005, 1005, "Burglary"),
|
||||
],
|
||||
)
|
||||
_write_month(crime, "2024-01", [_crime_row("2024-01", 1005, 1005, "Burglary")])
|
||||
_write_month(crime, "2024-02", [_crime_row("2024-02", 1005, 1005, "Burglary")])
|
||||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
avg = pl.read_parquet(output).row(0, named=True)
|
||||
assert avg["Burglary (avg/yr)"] == pytest.approx(18.0, abs=0.05)
|
||||
|
||||
row = pl.read_parquet(by_year).row(0, named=True)
|
||||
bars = {p["year"]: p["count"] for p in row["Burglary (by year)"]}
|
||||
assert bars == {2023: pytest.approx(24.0, abs=0.05), 2024: pytest.approx(12.0, abs=0.05)}
|
||||
|
||||
|
||||
def test_unknown_crime_type_is_dropped_with_warning(tmp_path, capsys):
|
||||
units = tmp_path / "units"
|
||||
_write_boundaries(
|
||||
units, {"AB1": [_square_feature("AB1 1AA", 1000, 1000, 1010, 1010)]}
|
||||
)
|
||||
|
||||
crime = tmp_path / "crime"
|
||||
_write_month(
|
||||
crime,
|
||||
"2024-01",
|
||||
[
|
||||
_crime_row("2024-01", 1005, 1005, "Burglary"),
|
||||
_crime_row("2024-01", 1005, 1005, "Cyber fraud"),
|
||||
],
|
||||
)
|
||||
|
||||
output = tmp_path / "crime_by_postcode.parquet"
|
||||
by_year = tmp_path / "crime_by_postcode_by_year.parquet"
|
||||
transform_crime_spatial(crime, units, output, by_year, buffer_m=50.0)
|
||||
|
||||
columns = pl.read_parquet(output).columns
|
||||
# The unknown type is dropped (no column for it) but a warning is emitted.
|
||||
assert "Cyber fraud (avg/yr)" not in columns
|
||||
assert "Burglary (avg/yr)" in columns
|
||||
err = capsys.readouterr().err
|
||||
assert "Cyber fraud" in err
|
||||
assert "WARNING" in err
|
||||
|
|
|
|||
|
|
@ -10,8 +10,10 @@ from pipeline.transform.merge import (
|
|||
LISTED_BUILDING_FEATURE,
|
||||
TREE_DENSITY_FEATURE,
|
||||
_LISTING_OVERLAY_SOURCES,
|
||||
_active_english_postcode_area,
|
||||
_build_unmatched_listing_seed_rows,
|
||||
_canonical_postcode_expr,
|
||||
_filter_to_active_english_postcodes,
|
||||
_finalize_listings,
|
||||
_integrate_listings,
|
||||
_match_direct_epc,
|
||||
|
|
@ -24,8 +26,11 @@ from pipeline.transform.merge import (
|
|||
_matched_listed_building_flags,
|
||||
_postcode_conservation_area_flags,
|
||||
_postcode_listed_building_candidates,
|
||||
_remap_terminated_postcodes,
|
||||
_split_normal_outputs,
|
||||
_tree_density_by_postcode,
|
||||
_validate_lad_source_coverage,
|
||||
_validate_postcode_feature_output,
|
||||
_validate_property_postcodes,
|
||||
)
|
||||
|
||||
|
|
@ -79,6 +84,113 @@ def test_crime_columns_are_spatial_counts_not_per_capita() -> None:
|
|||
assert "Minor crime per 1k residents (avg/yr)" not in _AREA_COLUMNS
|
||||
|
||||
|
||||
def test_active_english_postcode_area_filters_to_active_england() -> None:
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
|
||||
"ctry25cd": ["E92000001", "E92000001", "W92000004"],
|
||||
"doterm": [None, "2020-01-01", None],
|
||||
"lat": [51.0, 51.1, 52.0],
|
||||
"long": [-0.1, -0.2, -3.0],
|
||||
"lsoa21cd": ["L1", "L2", "L3"],
|
||||
"oa21cd": ["O1", "O2", "O3"],
|
||||
"pcon24cd": ["P1", "P2", "P3"],
|
||||
}
|
||||
)
|
||||
|
||||
result = _active_english_postcode_area(arcgis.lazy()).collect()
|
||||
|
||||
assert result.to_dicts() == [
|
||||
{
|
||||
"postcode": "AA1 1AA",
|
||||
"lat": 51.0,
|
||||
"lon": -0.1,
|
||||
"ctry25cd": "E92000001",
|
||||
"lsoa21": "L1",
|
||||
"oa21": "O1",
|
||||
"pcon": "P1",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_remap_then_active_filter_keeps_terminated_english_properties() -> None:
|
||||
wide = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["OLD 1AA", "NEW 1AA", "CF1 1AA"],
|
||||
"row_id": [1, 2, 3],
|
||||
}
|
||||
).lazy()
|
||||
mapping = pl.DataFrame(
|
||||
{"old_postcode": ["OLD 1AA"], "new_postcode": ["NEW 1AA"]}
|
||||
).lazy()
|
||||
active_postcodes = pl.DataFrame({"postcode": ["NEW 1AA"]}).lazy()
|
||||
|
||||
result = (
|
||||
_filter_to_active_english_postcodes(
|
||||
_remap_terminated_postcodes(wide, mapping), active_postcodes
|
||||
)
|
||||
.collect()
|
||||
.sort("row_id")
|
||||
)
|
||||
|
||||
assert result.to_dicts() == [
|
||||
{"postcode": "NEW 1AA", "row_id": 1},
|
||||
{"postcode": "NEW 1AA", "row_id": 2},
|
||||
]
|
||||
|
||||
|
||||
def test_split_normal_outputs_uses_postcode_feature_universe() -> None:
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA"],
|
||||
"Address per Property Register": ["1 Example Road"],
|
||||
"Last known price": [250_000],
|
||||
"lat": [51.0],
|
||||
"lon": [-0.1],
|
||||
"ctry25cd": ["E92000001"],
|
||||
"lsoa21": ["L1"],
|
||||
}
|
||||
)
|
||||
postcode_features = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA", "BB1 1BB"],
|
||||
"lat": [51.0, 52.0],
|
||||
"lon": [-0.1, -0.2],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
"lsoa21": ["L1", "L2"],
|
||||
"Distance to nearest amenity (Park) (km)": [0.3, 0.8],
|
||||
}
|
||||
)
|
||||
|
||||
postcode_df, properties_df = _split_normal_outputs(
|
||||
df, postcode_features, expected_postcode_count=2
|
||||
)
|
||||
|
||||
assert postcode_df["Postcode"].to_list() == ["AA1 1AA", "BB1 1BB"]
|
||||
assert "Distance to nearest amenity (Park) (km)" in postcode_df.columns
|
||||
assert properties_df.to_dicts() == [
|
||||
{
|
||||
"Postcode": "AA1 1AA",
|
||||
"Address per Property Register": "1 Example Road",
|
||||
"Last known price": 250_000,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_postcode_feature_validation_rejects_unsupported_or_ungeocoded_rows() -> None:
|
||||
postcode_df = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA", "CF1 1AA"],
|
||||
"lat": [51.0, None],
|
||||
"lon": [-0.1, None],
|
||||
"ctry25cd": ["E92000001", "W92000004"],
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="unsupported or ungeocoded"):
|
||||
_validate_postcode_feature_output(postcode_df, expected_postcode_count=2)
|
||||
|
||||
|
||||
def test_listed_building_feature_is_property_level() -> None:
|
||||
assert LISTED_BUILDING_FEATURE not in _AREA_COLUMNS
|
||||
|
||||
|
|
@ -418,9 +530,7 @@ def test_build_unmatched_listing_seed_rows_fills_property_shape_fields(
|
|||
)
|
||||
unmatched_idxs = listings.select("_listing_idx")
|
||||
|
||||
seed = _build_unmatched_listing_seed_rows(
|
||||
unmatched_idxs, listings, template_schema
|
||||
)
|
||||
seed = _build_unmatched_listing_seed_rows(unmatched_idxs, listings, template_schema)
|
||||
|
||||
assert seed.height == 1
|
||||
assert seed["postcode"].to_list() == ["SW1A 1AA"]
|
||||
|
|
@ -550,7 +660,12 @@ def test_match_direct_epc_matches_by_uprn_across_postcodes() -> None:
|
|||
[{"_listing_uprn": "100000000001", "_listing_match_postcode": "ZZ99ZZ"}]
|
||||
),
|
||||
_direct_epc_candidates(
|
||||
[{"_direct_epc_uprn": "100000000001", "_direct_epc_match_postcode": "AA11AA"}]
|
||||
[
|
||||
{
|
||||
"_direct_epc_uprn": "100000000001",
|
||||
"_direct_epc_match_postcode": "AA11AA",
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,81 +1,105 @@
|
|||
import math
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import pyogrio
|
||||
import pytest
|
||||
import shapely
|
||||
|
||||
from pipeline.transform.tree_density import (
|
||||
STREET_TREE_COVERAGE_COL,
|
||||
STREET_TREE_DENSITY_COL,
|
||||
_add_nfi_batch,
|
||||
_accumulate_clipped_area,
|
||||
_coverage_percentile_expr,
|
||||
_finalize_metrics,
|
||||
_geometry_column,
|
||||
_layers,
|
||||
_metric_columns,
|
||||
_nfi_dataset_path,
|
||||
_postcode_buffers,
|
||||
_postcode_density_percentile_col,
|
||||
_safe_extract_zip_dir,
|
||||
_with_postcode_density_percentiles,
|
||||
_write_street_rollups,
|
||||
)
|
||||
|
||||
|
||||
def test_nfi_accumulation_adds_only_clipped_overlap_area() -> None:
|
||||
def test_accumulate_clipped_area_adds_only_in_buffer_overlap() -> None:
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A", "B"], "x": [0.0, 1000.0], "y": [0.0, 0.0]})
|
||||
circles, tree = _postcode_buffers(points, radius_m)
|
||||
buffer_area = math.pi * radius_m * radius_m
|
||||
|
||||
# A large woodland square centred on postcode A fully covers A's circle.
|
||||
# A large square centred on postcode A fully covers A's buffer circle.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
big = shapely.box(-500, -500, 500, 500) # 1,000,000 sqm parcel
|
||||
_add_nfi_batch(
|
||||
np.array([big], dtype=object),
|
||||
np.array(["Woodland"], dtype=object),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
)
|
||||
_accumulate_clipped_area(np.array([big], dtype=object), circles, tree, canopy_area)
|
||||
# Only the clipped circle area is added (the 32-gon buffer approximates the
|
||||
# circle to ~1%), NOT the full 1,000,000 sqm polygon.
|
||||
assert canopy_area[0] == pytest.approx(buffer_area, rel=1e-2)
|
||||
assert canopy_area[0] <= buffer_area # never exceeds the buffer area
|
||||
assert canopy_area[0] <= buffer_area # never exceeds the true buffer area
|
||||
assert canopy_area[1] == 0.0 # postcode B is 1km away, no overlap
|
||||
assert feature_count.tolist() == [1, 0]
|
||||
|
||||
# A large parcel that only slivers into B's circle must add only the sliver,
|
||||
# not its full area -- the failure mode the old centroid path could not avoid.
|
||||
# not its full area -- the failure mode a centroid/full-area path could not avoid.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
sliver = shapely.box(1040, -500, 2000, 500) # left edge 10m inside B's circle
|
||||
_add_nfi_batch(
|
||||
np.array([sliver], dtype=object),
|
||||
np.array(["Woodland"], dtype=object),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
_accumulate_clipped_area(
|
||||
np.array([sliver], dtype=object), circles, tree, canopy_area
|
||||
)
|
||||
assert canopy_area[0] == 0.0
|
||||
assert 0.0 < canopy_area[1] < buffer_area # tiny segment, far below 1M sqm
|
||||
|
||||
# Non-woodland categories contribute nothing.
|
||||
canopy_area = np.zeros(2)
|
||||
feature_count = np.zeros(2, dtype=np.uint32)
|
||||
_add_nfi_batch(
|
||||
np.array([big], dtype=object),
|
||||
np.array(["Non woodland"], dtype=object),
|
||||
|
||||
def test_accumulate_clipped_area_drops_missing_and_empty_geometry() -> None:
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||
circles, tree = _postcode_buffers(points, radius_m)
|
||||
|
||||
canopy_area = np.zeros(1)
|
||||
geoms = np.array(
|
||||
[None, shapely.from_wkt("POLYGON EMPTY"), shapely.box(-10, -10, 10, 10)],
|
||||
dtype=object,
|
||||
)
|
||||
# A None and an empty geometry must be skipped, not crash, and only the real
|
||||
# 400 sqm box is accumulated (it is fully inside the buffer).
|
||||
_accumulate_clipped_area(geoms, circles, tree, canopy_area)
|
||||
assert canopy_area[0] == pytest.approx(400.0)
|
||||
|
||||
|
||||
def test_accumulate_clipped_area_height_weighted_by_overlap() -> None:
|
||||
radius_m = 50
|
||||
points = pl.DataFrame({"postcode": ["A"], "x": [0.0], "y": [0.0]})
|
||||
circles, tree = _postcode_buffers(points, radius_m)
|
||||
|
||||
canopy_area = np.zeros(1)
|
||||
height_weighted_sum = np.zeros(1)
|
||||
height_weight = np.zeros(1)
|
||||
geoms = np.array(
|
||||
[
|
||||
shapely.box(-10, -10, 0, 0), # 100 sqm, fully inside
|
||||
shapely.box(0, 0, 20, 20), # 400 sqm, fully inside
|
||||
shapely.box(-5, 0, 0, 5), # 25 sqm, NaN height -> ignored for height
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
height = np.array([5.0, 10.0, np.nan])
|
||||
|
||||
_accumulate_clipped_area(
|
||||
geoms,
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
height=height,
|
||||
height_weighted_sum=height_weighted_sum,
|
||||
height_weight=height_weight,
|
||||
)
|
||||
assert canopy_area.tolist() == [0.0, 0.0]
|
||||
assert feature_count.tolist() == [0, 0]
|
||||
|
||||
# All three clipped areas count toward canopy; only the finite-height ones
|
||||
# contribute to the area-weighted mean height.
|
||||
assert canopy_area[0] == pytest.approx(525.0)
|
||||
assert height_weight[0] == pytest.approx(500.0)
|
||||
mean_height = height_weighted_sum[0] / height_weight[0]
|
||||
assert mean_height == pytest.approx((5.0 * 100 + 10.0 * 400) / 500) # 9.0
|
||||
|
||||
|
||||
def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
|
||||
|
|
@ -88,76 +112,142 @@ def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
|
|||
assert result["percentile"].to_list() == [0.0, 50.0, 100.0, None]
|
||||
|
||||
|
||||
def test_coverage_percentile_expr_uses_exact_scale_endpoints() -> None:
|
||||
def test_coverage_percentile_expr_uses_tie_consistent_average_rank() -> None:
|
||||
# Tied extremes share their mean rank instead of being pinned to exact 0/100,
|
||||
# so the whole scale runs on one consistent average-rank formula.
|
||||
df = pl.DataFrame({"coverage": [0.0, 0.0, 5.0, 10.0, 10.0]})
|
||||
|
||||
result = df.lazy().with_columns(
|
||||
_coverage_percentile_expr("coverage", "percentile")
|
||||
).collect()
|
||||
|
||||
assert result["percentile"].to_list() == [0.0, 0.0, 50.0, 100.0, 100.0]
|
||||
assert result["percentile"].to_list() == [12.5, 12.5, 50.0, 87.5, 87.5]
|
||||
|
||||
|
||||
def test_street_rollup_percentiles_are_ranked_over_raw_street_coverage(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
def test_coverage_percentile_expr_all_equal_is_neutral_midpoint() -> None:
|
||||
all_equal = pl.DataFrame({"coverage": [5.0, 5.0, 5.0]})
|
||||
single = pl.DataFrame({"coverage": [7.0]})
|
||||
with_null = pl.DataFrame({"coverage": [None, 5.0, 5.0, 5.0]})
|
||||
|
||||
def percentiles(df: pl.DataFrame) -> list:
|
||||
return (
|
||||
df.lazy()
|
||||
.with_columns(_coverage_percentile_expr("coverage", "percentile"))
|
||||
.collect()["percentile"]
|
||||
.to_list()
|
||||
)
|
||||
|
||||
assert percentiles(all_equal) == [50.0, 50.0, 50.0]
|
||||
assert percentiles(single) == [50.0]
|
||||
assert percentiles(with_null) == [None, 50.0, 50.0, 50.0]
|
||||
|
||||
|
||||
def test_finalize_metrics_caps_density_keeps_raw_area_and_weights_height() -> None:
|
||||
radius_m = 50
|
||||
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
|
||||
buffer_area = math.pi * radius_m * radius_m
|
||||
density_col, area_col, height_col = _metric_columns(radius_m)
|
||||
|
||||
points = pl.DataFrame({"postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"]})
|
||||
canopy_area = np.array([0.0, buffer_area * 0.5, buffer_area * 2.0])
|
||||
# Postcode 0: no height samples -> null. Postcode 1: area-weighted mean = 5.
|
||||
height_weighted_sum = np.array([0.0, 500.0, 0.0])
|
||||
height_weight = np.array([0.0, 100.0, 0.0])
|
||||
|
||||
metrics = _finalize_metrics(
|
||||
points, canopy_area, height_weighted_sum, height_weight, radius_m
|
||||
)
|
||||
|
||||
assert metrics[density_col].to_list() == [0.0, 50.0, 100.0] # capped at 100
|
||||
# area_col is the raw clipped accumulation, intentionally uncapped.
|
||||
assert metrics[area_col].to_list() == pytest.approx(
|
||||
[0.0, round(buffer_area * 0.5, 1), round(buffer_area * 2.0, 1)]
|
||||
)
|
||||
assert metrics[height_col].to_list() == [None, 5.0, None]
|
||||
# The mixed-unit feature-count column has been removed entirely.
|
||||
assert "Tree features within 50m" not in metrics.columns
|
||||
assert set(metrics.columns) == {"postcode", density_col, area_col, height_col}
|
||||
|
||||
|
||||
def test_postcode_density_percentiles_rank_over_density() -> None:
|
||||
radius_m = 50
|
||||
density_col, area_col, height_col = _metric_columns(radius_m)
|
||||
percentile_col = _postcode_density_percentile_col(radius_m)
|
||||
|
||||
postcode_metrics = _with_postcode_density_percentiles(
|
||||
metrics = _with_postcode_density_percentiles(
|
||||
pl.DataFrame(
|
||||
{
|
||||
"postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"],
|
||||
density_col: [10.0, 30.0, 50.0],
|
||||
area_col: [100.0, 300.0, 500.0],
|
||||
count_col: [1, 3, 5],
|
||||
height_col: [4.0, 6.0, 8.0],
|
||||
}
|
||||
),
|
||||
radius_m,
|
||||
)
|
||||
|
||||
price_paid = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["AA1 1AA", "AA1 1AA", "AA1 1AB", "AA1 1AC"],
|
||||
"paon": ["1", "2", "3", "4"],
|
||||
"saon": ["", "", "", ""],
|
||||
"street": ["Oak Road", "Oak Road", "Oak Road", "Elm Street"],
|
||||
"locality": ["", "", "", ""],
|
||||
"town_city": ["Test Town", "Test Town", "Test Town", "Test Town"],
|
||||
"district": ["Test District"] * 4,
|
||||
"county": ["Test County"] * 4,
|
||||
"date_of_transfer": [
|
||||
"2024-01-01",
|
||||
"2024-01-02",
|
||||
"2024-01-03",
|
||||
"2024-01-04",
|
||||
],
|
||||
}
|
||||
assert percentile_col in metrics.columns
|
||||
assert metrics[percentile_col].to_list() == [0.0, 50.0, 100.0]
|
||||
|
||||
|
||||
def test_safe_extract_zip_dir_rejects_path_traversal(tmp_path: Path) -> None:
|
||||
malicious = tmp_path / "evil.zip"
|
||||
with zipfile.ZipFile(malicious, "w") as archive:
|
||||
archive.writestr("../escape.txt", "pwned")
|
||||
|
||||
with pytest.raises(ValueError, match="Unsafe path"):
|
||||
_safe_extract_zip_dir(malicious, tmp_path / "extract", force=True)
|
||||
|
||||
|
||||
def test_safe_extract_zip_dir_extracts_benign_archive(tmp_path: Path) -> None:
|
||||
benign = tmp_path / "ok.zip"
|
||||
with zipfile.ZipFile(benign, "w") as archive:
|
||||
archive.writestr("data/x.txt", "hello")
|
||||
|
||||
extract_dir = tmp_path / "extract"
|
||||
result = _safe_extract_zip_dir(benign, extract_dir, force=True)
|
||||
assert result == extract_dir
|
||||
assert (extract_dir / "data" / "x.txt").read_text() == "hello"
|
||||
|
||||
|
||||
def test_geometry_column_resolution() -> None:
|
||||
assert _geometry_column({"geometry_name": "SHAPE"}, ["MEANHT", "SHAPE"]) == "SHAPE"
|
||||
assert _geometry_column({}, ["a", "wkb_geometry", "b"]) == "wkb_geometry"
|
||||
assert _geometry_column({"geometry_name": None}, ["x", "geom"]) == "geom"
|
||||
assert _geometry_column({}, ["a", "b", "c"]) == "c" # last-column fallback
|
||||
|
||||
|
||||
def _zip_with_shapefiles(zip_path: Path, names: list[str]) -> None:
|
||||
with zipfile.ZipFile(zip_path, "w") as archive:
|
||||
for name in names:
|
||||
archive.writestr(name, "")
|
||||
|
||||
|
||||
def test_nfi_dataset_path_requires_exactly_one_shapefile(tmp_path: Path) -> None:
|
||||
multi = tmp_path / "multi.zip"
|
||||
_zip_with_shapefiles(multi, ["a.shp", "b.shp"])
|
||||
with pytest.raises(ValueError, match="exactly one shapefile"):
|
||||
_nfi_dataset_path(multi, tmp_path / "multi_x", force_extract=True, use_vsizip=False)
|
||||
|
||||
none = tmp_path / "none.zip"
|
||||
_zip_with_shapefiles(none, ["readme.txt"])
|
||||
with pytest.raises(FileNotFoundError):
|
||||
_nfi_dataset_path(none, tmp_path / "none_x", force_extract=True, use_vsizip=False)
|
||||
|
||||
one = tmp_path / "one.zip"
|
||||
_zip_with_shapefiles(one, ["woodland.shp", "woodland.dbf"])
|
||||
resolved = _nfi_dataset_path(
|
||||
one, tmp_path / "one_x", force_extract=True, use_vsizip=False
|
||||
)
|
||||
price_paid_path = tmp_path / "price-paid.parquet"
|
||||
output_streets = tmp_path / "streets.parquet"
|
||||
output_addresses = tmp_path / "addresses.parquet"
|
||||
price_paid.write_parquet(price_paid_path)
|
||||
assert resolved.endswith("woodland.shp")
|
||||
|
||||
_write_street_rollups(
|
||||
postcode_metrics=postcode_metrics,
|
||||
price_paid_path=price_paid_path,
|
||||
output_streets=output_streets,
|
||||
output_addresses=output_addresses,
|
||||
radius_m=radius_m,
|
||||
|
||||
def test_layers_selection_and_unknown(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
pyogrio,
|
||||
"list_layers",
|
||||
lambda _path: [("L1", "Polygon"), ("L2", "Polygon")],
|
||||
)
|
||||
|
||||
streets = pl.read_parquet(output_streets).sort("street")
|
||||
addresses = pl.read_parquet(output_addresses)
|
||||
|
||||
assert streets["street"].to_list() == ["Elm Street", "Oak Road"]
|
||||
assert streets[STREET_TREE_COVERAGE_COL].to_list() == pytest.approx([50.0, 16.7])
|
||||
assert streets.select("street", STREET_TREE_DENSITY_COL).rows() == [
|
||||
("Elm Street", 100.0),
|
||||
("Oak Road", 0.0),
|
||||
]
|
||||
assert percentile_col in addresses.columns
|
||||
assert STREET_TREE_COVERAGE_COL in addresses.columns
|
||||
assert STREET_TREE_DENSITY_COL in addresses.columns
|
||||
assert _layers("ignored", None) == ["L1", "L2"]
|
||||
assert _layers("ignored", ("L2",)) == ["L2"]
|
||||
with pytest.raises(ValueError, match="Unknown TOW layer"):
|
||||
_layers("ignored", ("L3",))
|
||||
|
|
|
|||
|
|
@ -1,16 +1,28 @@
|
|||
"""Derive street-scale tree density metrics from Forest Research TOW + NFI data.
|
||||
"""Derive postcode-scale tree density metrics from Forest Research TOW + NFI data.
|
||||
|
||||
The Forest Research Trees Outside Woodland release is an Esri File Geodatabase
|
||||
inside property-data/FR_TOW_V1_ALL.zip. This transformer computes a compact
|
||||
postcode-level metric from the tree polygons, then optionally rolls that up to
|
||||
Price Paid street names so the dashboard can answer "what is this address's
|
||||
street like?" without loading the full geodatabase at runtime.
|
||||
postcode-level metric from the tree polygons so the dashboard can answer "how
|
||||
green is this postcode?" without loading the full geodatabase at runtime.
|
||||
|
||||
TOW only covers trees *outside* woodland, so the National Forest Inventory (NFI)
|
||||
woodland layer is optionally unioned in. TOW canopy is accumulated by centroid
|
||||
proximity (tiny crowns), while large NFI woodland parcels are accumulated by
|
||||
true buffer-clipped intersection area so they cannot saturate a postcode from
|
||||
mere centroid proximity.
|
||||
Every postcode centroid is expanded into a radius-r buffer ("extended area").
|
||||
Both TOW tree crowns and National Forest Inventory (NFI) woodland parcels are
|
||||
accumulated by *true buffer-clipped intersection area*: only the part of each
|
||||
polygon that falls inside a postcode's buffer is counted, never the area that
|
||||
spills outside it. A crown straddling the buffer edge therefore contributes only
|
||||
its inside portion, and a parcel reaching into the buffer from outside is still
|
||||
counted -- no polygon can saturate a postcode from mere proximity.
|
||||
|
||||
TOW only covers trees *outside* woodland, so the NFI woodland layer is the
|
||||
geometric complement of TOW and is optionally unioned in. The two products are
|
||||
*assumed disjoint*: clipped TOW crown area and clipped NFI woodland area are
|
||||
summed into the same per-postcode accumulator, so any spatial overlap between a
|
||||
TOW crown and an NFI parcel (boundary slop where "groups of trees" meet
|
||||
"woodland") would be double-counted. The final density is capped at 100% and
|
||||
_finalize_metrics logs how many postcodes exceed 100% raw coverage, which is a
|
||||
direct symptom of such overlap (or of overlapping crowns within one buffer); if
|
||||
that count is material the products are not disjoint and the NFI clip should be
|
||||
taken against the complement of TOW.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -25,16 +37,12 @@ import numpy as np
|
|||
import polars as pl
|
||||
import pyogrio
|
||||
import shapely
|
||||
from scipy.spatial import cKDTree
|
||||
|
||||
|
||||
TOW_GDB_NAME = "FR_TOW_V1_ALL.gdb"
|
||||
STREET_TREE_DENSITY_COL = "Street tree density percentile"
|
||||
STREET_TREE_COVERAGE_COL = "Street tree coverage (%)"
|
||||
POSTCODE_DENSITY_COL = "Tree canopy density within {radius}m (%)"
|
||||
POSTCODE_DENSITY_PERCENTILE_COL = "Tree canopy density percentile within {radius}m"
|
||||
POSTCODE_AREA_COL = "Tree canopy area within {radius}m (sqm)"
|
||||
POSTCODE_COUNT_COL = "Tree features within {radius}m"
|
||||
POSTCODE_HEIGHT_COL = "Mean TOW height within {radius}m (m)"
|
||||
|
||||
# National Forest Inventory (NFI) woodland — the geometric complement of TOW.
|
||||
|
|
@ -131,13 +139,24 @@ def _safe_extract_zip_dir(zip_path: Path, extract_dir: Path, force: bool) -> Pat
|
|||
def _nfi_dataset_path(
|
||||
zip_path: Path, extract_dir: Path, force_extract: bool, use_vsizip: bool
|
||||
) -> str:
|
||||
"""Resolve the NFI woodland shapefile path, extracting the zip if needed."""
|
||||
"""Resolve the NFI woodland shapefile path, extracting the zip if needed.
|
||||
|
||||
Raises if the archive contains zero or more than one shapefile rather than
|
||||
silently picking one, so an ambiguous NFI release fails loudly instead of
|
||||
accumulating canopy from the wrong layer.
|
||||
"""
|
||||
if use_vsizip:
|
||||
return f"/vsizip/{zip_path.resolve()}"
|
||||
extracted = _safe_extract_zip_dir(zip_path, extract_dir, force_extract)
|
||||
shapefiles = sorted(extracted.rglob("*.shp"))
|
||||
if not shapefiles:
|
||||
raise FileNotFoundError(f"No .shp found inside {zip_path}")
|
||||
if len(shapefiles) > 1:
|
||||
names = ", ".join(path.name for path in shapefiles)
|
||||
raise ValueError(
|
||||
f"Expected exactly one shapefile inside {zip_path}, found {len(shapefiles)} "
|
||||
f"({names}); cannot unambiguously pick the NFI woodland layer"
|
||||
)
|
||||
return str(shapefiles[0])
|
||||
|
||||
|
||||
|
|
@ -146,7 +165,7 @@ def _geometry_column(metadata: dict, column_names: list[str]) -> str:
|
|||
geometry_name = metadata.get("geometry_name")
|
||||
if geometry_name:
|
||||
return str(geometry_name)
|
||||
for name in ("wkb_geometry", "geometry", "geom"):
|
||||
for name in ("wkb_geometry", "geometry", "geom", "SHAPE"):
|
||||
if name in column_names:
|
||||
return name
|
||||
return column_names[-1]
|
||||
|
|
@ -184,11 +203,10 @@ def _layers(dataset_path: str, selected_layers: tuple[str, ...] | None) -> list[
|
|||
return [layer for layer in available if layer in selected_layers]
|
||||
|
||||
|
||||
def _metric_columns(radius_m: int) -> tuple[str, str, str, str]:
|
||||
def _metric_columns(radius_m: int) -> tuple[str, str, str]:
|
||||
return (
|
||||
POSTCODE_DENSITY_COL.format(radius=radius_m),
|
||||
POSTCODE_AREA_COL.format(radius=radius_m),
|
||||
POSTCODE_COUNT_COL.format(radius=radius_m),
|
||||
POSTCODE_HEIGHT_COL.format(radius=radius_m),
|
||||
)
|
||||
|
||||
|
|
@ -198,20 +216,23 @@ def _postcode_density_percentile_col(radius_m: int) -> str:
|
|||
|
||||
|
||||
def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
|
||||
"""Rank higher tree coverage higher on a 0-100 England-wide percentile scale."""
|
||||
"""Rank tree coverage on a 0-100 England-wide percentile scale.
|
||||
|
||||
A single tie-consistent average-rank formula is used for every value so the
|
||||
scale is internally consistent end to end: tied values share their mean rank,
|
||||
so the lowest coverage maps toward 0 and the highest toward 100 only when they
|
||||
are not themselves tied. An all-equal (or single-value) column has no spread
|
||||
and maps to the neutral midpoint (50).
|
||||
"""
|
||||
value = pl.col(column).fill_nan(None)
|
||||
non_null_count = value.count()
|
||||
rank = value.rank("average")
|
||||
return (
|
||||
pl.when(value.is_null())
|
||||
.then(None)
|
||||
.when(value == value.min())
|
||||
.then(0.0)
|
||||
.when(value == value.max())
|
||||
.then(100.0)
|
||||
.when(non_null_count > 1)
|
||||
.then(((rank - 1) / (non_null_count - 1) * 100).round(1))
|
||||
.otherwise(100.0)
|
||||
.otherwise(50.0)
|
||||
.cast(pl.Float32)
|
||||
.alias(alias)
|
||||
)
|
||||
|
|
@ -220,7 +241,7 @@ def _coverage_percentile_expr(column: str, alias: str) -> pl.Expr:
|
|||
def _with_postcode_density_percentiles(
|
||||
postcode_metrics: pl.DataFrame, radius_m: int
|
||||
) -> pl.DataFrame:
|
||||
density_col, _area_col, _count_col, _height_col = _metric_columns(radius_m)
|
||||
density_col, _area_col, _height_col = _metric_columns(radius_m)
|
||||
return postcode_metrics.with_columns(
|
||||
_coverage_percentile_expr(
|
||||
density_col,
|
||||
|
|
@ -229,28 +250,88 @@ def _with_postcode_density_percentiles(
|
|||
)
|
||||
|
||||
|
||||
def _accumulate_tree_metrics(
|
||||
def _postcode_buffers(
|
||||
points: pl.DataFrame, radius_m: int
|
||||
) -> tuple[np.ndarray, shapely.STRtree]:
|
||||
"""Build a radius-r circle for every postcode plus an STRtree over them.
|
||||
|
||||
Circle index == postcode index, so an STRtree match resolves directly to the
|
||||
postcode accumulator slot.
|
||||
"""
|
||||
xy = points.select("x", "y").to_numpy()
|
||||
circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
|
||||
return circles, shapely.STRtree(circles)
|
||||
|
||||
|
||||
def _accumulate_clipped_area(
|
||||
geoms: np.ndarray,
|
||||
circles: np.ndarray,
|
||||
tree: shapely.STRtree,
|
||||
canopy_area: np.ndarray,
|
||||
height: np.ndarray | None = None,
|
||||
height_weighted_sum: np.ndarray | None = None,
|
||||
height_weight: np.ndarray | None = None,
|
||||
) -> None:
|
||||
"""Add each polygon's in-buffer overlap area to every postcode it intersects.
|
||||
|
||||
Only area(polygon ∩ circle) is accumulated -- never the area of the polygon
|
||||
that falls outside the postcode's extended buffer -- so a crown straddling
|
||||
the buffer edge contributes only its inside portion and a large parcel cannot
|
||||
saturate a postcode from mere proximity. When ``height`` is supplied the mean
|
||||
feature height is accumulated weighted by that same clipped overlap area.
|
||||
"""
|
||||
keep = ~shapely.is_missing(geoms) & ~shapely.is_empty(geoms)
|
||||
geoms = geoms[keep]
|
||||
if height is not None:
|
||||
height = height[keep]
|
||||
if geoms.size == 0:
|
||||
return
|
||||
|
||||
# query(predicate="intersects") over the circle STRtree returns exactly the
|
||||
# (polygon, circle) pairs whose clipped overlap can be positive -- i.e. the
|
||||
# polygon overlaps that postcode's radius-r buffer.
|
||||
geom_index, postcode_index = tree.query(geoms, predicate="intersects")
|
||||
if geom_index.size == 0:
|
||||
return
|
||||
|
||||
clipped_area = shapely.area(
|
||||
shapely.intersection(geoms[geom_index], circles[postcode_index])
|
||||
)
|
||||
positive = clipped_area > 0
|
||||
geom_index = geom_index[positive]
|
||||
postcode_index = postcode_index[positive]
|
||||
clipped_area = clipped_area[positive]
|
||||
|
||||
np.add.at(canopy_area, postcode_index, clipped_area)
|
||||
|
||||
if height is not None:
|
||||
feature_height = height[geom_index]
|
||||
finite = np.isfinite(feature_height)
|
||||
if finite.any():
|
||||
np.add.at(
|
||||
height_weighted_sum,
|
||||
postcode_index[finite],
|
||||
feature_height[finite] * clipped_area[finite],
|
||||
)
|
||||
np.add.at(height_weight, postcode_index[finite], clipped_area[finite])
|
||||
|
||||
|
||||
def _accumulate_tow_metrics(
|
||||
dataset_path: str,
|
||||
points: pl.DataFrame,
|
||||
radius_m: int,
|
||||
circles: np.ndarray,
|
||||
tree: shapely.STRtree,
|
||||
canopy_area: np.ndarray,
|
||||
height_weighted_sum: np.ndarray,
|
||||
height_weight: np.ndarray,
|
||||
batch_size: int,
|
||||
layer_names: tuple[str, ...] | None,
|
||||
max_features_per_layer: int | None,
|
||||
workers: int,
|
||||
canopy_area: np.ndarray,
|
||||
feature_count: np.ndarray,
|
||||
height_weighted_sum: np.ndarray,
|
||||
height_weight: np.ndarray,
|
||||
) -> None:
|
||||
xy = points.select("x", "y").to_numpy()
|
||||
tree = cKDTree(xy)
|
||||
|
||||
layers = _layers(dataset_path, layer_names)
|
||||
print(f"Processing {len(layers)} TOW layer(s): {', '.join(layers)}")
|
||||
|
||||
columns = ["Woodland_Type", "TOW_Area_M", "MEANHT"]
|
||||
columns = ["MEANHT"]
|
||||
total_features_seen = 0
|
||||
total_features_used = 0
|
||||
|
||||
for layer in layers:
|
||||
info = pyogrio.read_info(dataset_path, layer=layer)
|
||||
|
|
@ -263,7 +344,7 @@ def _accumulate_tree_metrics(
|
|||
columns=columns,
|
||||
batch_size=batch_size,
|
||||
use_pyarrow=True,
|
||||
) as (_meta, reader):
|
||||
) as (meta, reader):
|
||||
for batch_index, batch in enumerate(reader, start=1):
|
||||
if max_features_per_layer is not None:
|
||||
remaining = max_features_per_layer - layer_features_seen
|
||||
|
|
@ -275,135 +356,29 @@ def _accumulate_tree_metrics(
|
|||
layer_features_seen += batch.num_rows
|
||||
total_features_seen += batch.num_rows
|
||||
names = batch.schema.names
|
||||
area = np.asarray(
|
||||
batch.column(names.index("TOW_Area_M")).to_numpy(zero_copy_only=False),
|
||||
dtype=np.float64,
|
||||
)
|
||||
geometry_column = _geometry_column(meta, names)
|
||||
height = np.asarray(
|
||||
batch.column(names.index("MEANHT")).to_numpy(zero_copy_only=False),
|
||||
dtype=np.float64,
|
||||
)
|
||||
geometry = np.asarray(
|
||||
batch.column(names.index("SHAPE")).to_numpy(zero_copy_only=False),
|
||||
batch.column(names.index(geometry_column)).to_numpy(
|
||||
zero_copy_only=False
|
||||
),
|
||||
dtype=object,
|
||||
)
|
||||
|
||||
valid = np.isfinite(area) & (area > 0)
|
||||
if not valid.any():
|
||||
continue
|
||||
|
||||
geometry = geometry[valid]
|
||||
area = area[valid]
|
||||
height = height[valid]
|
||||
|
||||
centroids = shapely.centroid(shapely.from_wkb(geometry))
|
||||
x = shapely.get_x(centroids)
|
||||
y = shapely.get_y(centroids)
|
||||
valid_xy = np.isfinite(x) & np.isfinite(y)
|
||||
if not valid_xy.any():
|
||||
continue
|
||||
|
||||
x = x[valid_xy]
|
||||
y = y[valid_xy]
|
||||
area = area[valid_xy]
|
||||
height = height[valid_xy]
|
||||
|
||||
nearby = tree.query_ball_point(
|
||||
np.column_stack((x, y)), radius_m, workers=workers
|
||||
_accumulate_clipped_area(
|
||||
shapely.from_wkb(geometry),
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
height=height,
|
||||
height_weighted_sum=height_weighted_sum,
|
||||
height_weight=height_weight,
|
||||
)
|
||||
lengths = np.fromiter(
|
||||
(len(postcode_indexes) for postcode_indexes in nearby),
|
||||
dtype=np.int32,
|
||||
count=len(nearby),
|
||||
)
|
||||
matching_features = lengths > 0
|
||||
if matching_features.any():
|
||||
postcode_indexes = np.concatenate(
|
||||
[indexes for indexes in nearby if indexes]
|
||||
).astype(np.int64, copy=False)
|
||||
feature_indexes = np.repeat(
|
||||
np.flatnonzero(matching_features), lengths[matching_features]
|
||||
)
|
||||
|
||||
np.add.at(canopy_area, postcode_indexes, area[feature_indexes])
|
||||
np.add.at(feature_count, postcode_indexes, 1)
|
||||
|
||||
feature_height = height[feature_indexes]
|
||||
valid_height = np.isfinite(feature_height)
|
||||
if valid_height.any():
|
||||
height_area = area[feature_indexes][valid_height]
|
||||
np.add.at(
|
||||
height_weighted_sum,
|
||||
postcode_indexes[valid_height],
|
||||
feature_height[valid_height] * height_area,
|
||||
)
|
||||
np.add.at(
|
||||
height_weight,
|
||||
postcode_indexes[valid_height],
|
||||
height_area,
|
||||
)
|
||||
|
||||
total_features_used += len(area)
|
||||
if batch_index == 1 or batch_index % 25 == 0:
|
||||
print(
|
||||
f" batch {batch_index:,}: "
|
||||
f"{total_features_seen:,} rows read, "
|
||||
f"{total_features_used:,} features with usable centroids"
|
||||
)
|
||||
|
||||
|
||||
def _postcode_buffers(
|
||||
points: pl.DataFrame, radius_m: int
|
||||
) -> tuple[np.ndarray, shapely.STRtree]:
|
||||
"""Build a radius-r circle for every postcode plus an STRtree over them.
|
||||
|
||||
Circle index == postcode index, matching the order used by the cKDTree path.
|
||||
"""
|
||||
xy = points.select("x", "y").to_numpy()
|
||||
circles = shapely.buffer(shapely.points(xy), radius_m, quad_segs=8)
|
||||
return circles, shapely.STRtree(circles)
|
||||
|
||||
|
||||
def _add_nfi_batch(
|
||||
geoms: np.ndarray,
|
||||
category: np.ndarray,
|
||||
circles: np.ndarray,
|
||||
tree: shapely.STRtree,
|
||||
canopy_area: np.ndarray,
|
||||
feature_count: np.ndarray,
|
||||
radius_m: int,
|
||||
) -> None:
|
||||
"""Add NFI woodland into the shared arrays by true buffer-clipped area.
|
||||
|
||||
Unlike the TOW centroid path, this clips each woodland polygon to each
|
||||
nearby postcode circle and adds only area(polygon ∩ circle); a large parcel
|
||||
therefore cannot saturate a postcode from mere centroid proximity, and a
|
||||
buffer-filling parcel whose centroid is outside the radius is not missed.
|
||||
"""
|
||||
keep = (category == NFI_WOODLAND_VALUE) & ~shapely.is_missing(geoms)
|
||||
geoms = geoms[keep]
|
||||
if geoms.size:
|
||||
geoms = geoms[~shapely.is_empty(geoms)]
|
||||
if geoms.size == 0:
|
||||
return
|
||||
|
||||
# dwithin(polygon, point, r) is true iff the radius-r circle around the
|
||||
# point intersects the polygon -- exactly the candidate set we want.
|
||||
nfi_index, postcode_index = tree.query(
|
||||
geoms, predicate="dwithin", distance=radius_m
|
||||
)
|
||||
if nfi_index.size == 0:
|
||||
return
|
||||
|
||||
clipped_area = shapely.area(
|
||||
shapely.intersection(geoms[nfi_index], circles[postcode_index])
|
||||
)
|
||||
positive = clipped_area > 0
|
||||
postcode_index = postcode_index[positive]
|
||||
clipped_area = clipped_area[positive]
|
||||
|
||||
np.add.at(canopy_area, postcode_index, clipped_area)
|
||||
np.add.at(feature_count, postcode_index, 1)
|
||||
print(f" batch {batch_index:,}: {total_features_seen:,} rows read")
|
||||
|
||||
|
||||
def _accumulate_nfi_metrics(
|
||||
|
|
@ -411,8 +386,6 @@ def _accumulate_nfi_metrics(
|
|||
circles: np.ndarray,
|
||||
tree: shapely.STRtree,
|
||||
canopy_area: np.ndarray,
|
||||
feature_count: np.ndarray,
|
||||
radius_m: int,
|
||||
batch_size: int,
|
||||
max_nfi_features: int | None,
|
||||
) -> None:
|
||||
|
|
@ -455,14 +428,12 @@ def _accumulate_nfi_metrics(
|
|||
),
|
||||
dtype=object,
|
||||
)
|
||||
_add_nfi_batch(
|
||||
shapely.from_wkb(geometry),
|
||||
category,
|
||||
geoms = shapely.from_wkb(geometry)
|
||||
_accumulate_clipped_area(
|
||||
geoms[category == NFI_WOODLAND_VALUE],
|
||||
circles,
|
||||
tree,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
radius_m,
|
||||
)
|
||||
if batch_index == 1 or batch_index % 25 == 0:
|
||||
print(f" NFI batch {batch_index:,}: {features_seen:,} rows read")
|
||||
|
|
@ -471,15 +442,26 @@ def _accumulate_nfi_metrics(
|
|||
def _finalize_metrics(
|
||||
points: pl.DataFrame,
|
||||
canopy_area: np.ndarray,
|
||||
feature_count: np.ndarray,
|
||||
height_weighted_sum: np.ndarray,
|
||||
height_weight: np.ndarray,
|
||||
radius_m: int,
|
||||
) -> pl.DataFrame:
|
||||
n_points = points.height
|
||||
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
|
||||
density_col, area_col, height_col = _metric_columns(radius_m)
|
||||
buffer_area = math.pi * radius_m * radius_m
|
||||
density_pct = np.minimum(canopy_area / buffer_area * 100.0, 100.0)
|
||||
raw_density = canopy_area / buffer_area * 100.0
|
||||
density_pct = np.minimum(raw_density, 100.0)
|
||||
|
||||
# Symptom of the assumed-disjoint TOW/NFI union being violated (or of
|
||||
# overlapping crowns inside one buffer): clipped areas alone cannot exceed the
|
||||
# buffer unless polygons overlap. Surface it rather than hide it behind the cap.
|
||||
over_count = int(np.count_nonzero(raw_density > 100.0))
|
||||
if over_count:
|
||||
print(
|
||||
f" note: {over_count:,} postcode(s) exceeded 100% raw canopy and were "
|
||||
"capped — indicates overlapping TOW/NFI canopy within the buffer"
|
||||
)
|
||||
|
||||
mean_height = np.divide(
|
||||
height_weighted_sum,
|
||||
height_weight,
|
||||
|
|
@ -492,7 +474,6 @@ def _finalize_metrics(
|
|||
"postcode": points["postcode"],
|
||||
area_col: canopy_area.round(1).astype(np.float32),
|
||||
density_col: density_pct.round(1).astype(np.float32),
|
||||
count_col: feature_count.astype(np.uint32),
|
||||
height_col: np.round(mean_height, 1).astype(np.float32),
|
||||
}
|
||||
).with_columns(
|
||||
|
|
@ -500,181 +481,9 @@ def _finalize_metrics(
|
|||
)
|
||||
|
||||
|
||||
def _clean_key_expr(column: str) -> pl.Expr:
|
||||
return (
|
||||
pl.col(column)
|
||||
.fill_null("")
|
||||
.str.to_uppercase()
|
||||
.str.replace_all(r"[^A-Z0-9]+", " ")
|
||||
.str.replace_all(r"\s+", " ")
|
||||
.str.strip_chars()
|
||||
)
|
||||
|
||||
|
||||
def _latest_price_paid_addresses(price_paid_path: Path) -> pl.LazyFrame:
|
||||
return (
|
||||
pl.scan_parquet(price_paid_path)
|
||||
.select(
|
||||
pl.col("postcode").str.strip_chars().str.to_uppercase().alias("postcode"),
|
||||
"paon",
|
||||
"saon",
|
||||
"street",
|
||||
"locality",
|
||||
"town_city",
|
||||
"district",
|
||||
"county",
|
||||
"date_of_transfer",
|
||||
)
|
||||
.filter(pl.col("postcode").is_not_null())
|
||||
.filter(pl.col("street").is_not_null())
|
||||
.filter(_clean_key_expr("street") != "")
|
||||
.with_columns(
|
||||
pl.concat_str(
|
||||
[pl.col("saon"), pl.col("paon"), pl.col("street")],
|
||||
separator=" ",
|
||||
ignore_nulls=True,
|
||||
)
|
||||
.str.replace_all(r"\s+", " ")
|
||||
.str.strip_chars()
|
||||
.alias("pp_address"),
|
||||
)
|
||||
.filter(pl.col("pp_address").is_not_null())
|
||||
.sort("date_of_transfer")
|
||||
.group_by("postcode", "pp_address", maintain_order=True)
|
||||
.agg(
|
||||
pl.col("street").last(),
|
||||
pl.col("locality").last(),
|
||||
pl.col("town_city").last(),
|
||||
pl.col("district").last(),
|
||||
pl.col("county").last(),
|
||||
)
|
||||
.with_columns(
|
||||
pl.concat_str(
|
||||
[
|
||||
_clean_key_expr("street"),
|
||||
_clean_key_expr("town_city"),
|
||||
_clean_key_expr("district"),
|
||||
_clean_key_expr("county"),
|
||||
],
|
||||
separator="|",
|
||||
).alias("street_key")
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _weighted_mean_expr(column: str, weight: str) -> pl.Expr:
|
||||
valid = pl.col(column).is_not_null() & ~pl.col(column).is_nan()
|
||||
numerator = pl.when(valid).then(pl.col(column) * pl.col(weight)).sum()
|
||||
denominator = pl.when(valid).then(pl.col(weight)).sum()
|
||||
return pl.when(denominator > 0).then(numerator / denominator).otherwise(None)
|
||||
|
||||
|
||||
def _write_street_rollups(
|
||||
postcode_metrics: pl.DataFrame,
|
||||
price_paid_path: Path,
|
||||
output_streets: Path | None,
|
||||
output_addresses: Path | None,
|
||||
radius_m: int,
|
||||
) -> None:
|
||||
if output_streets is None and output_addresses is None:
|
||||
return
|
||||
|
||||
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
|
||||
metrics = postcode_metrics.lazy()
|
||||
addresses = _latest_price_paid_addresses(price_paid_path).join(
|
||||
metrics, on="postcode", how="inner"
|
||||
)
|
||||
|
||||
per_postcode = (
|
||||
addresses.group_by(
|
||||
"street_key",
|
||||
"postcode",
|
||||
"street",
|
||||
"locality",
|
||||
"town_city",
|
||||
"district",
|
||||
"county",
|
||||
)
|
||||
.agg(
|
||||
pl.len().alias("address_count"),
|
||||
pl.col(density_col).first(),
|
||||
pl.col(area_col).first(),
|
||||
pl.col(count_col).first(),
|
||||
pl.col(height_col).first(),
|
||||
)
|
||||
.collect()
|
||||
)
|
||||
|
||||
streets = (
|
||||
per_postcode.lazy()
|
||||
.group_by("street_key")
|
||||
.agg(
|
||||
pl.col("street").first(),
|
||||
pl.col("locality").first(),
|
||||
pl.col("town_city").first(),
|
||||
pl.col("district").first(),
|
||||
pl.col("county").first(),
|
||||
pl.col("postcode").n_unique().alias("postcode_count"),
|
||||
pl.col("address_count").sum().alias("address_count"),
|
||||
_weighted_mean_expr(density_col, "address_count")
|
||||
.round(1)
|
||||
.cast(pl.Float32)
|
||||
.alias(STREET_TREE_COVERAGE_COL),
|
||||
_weighted_mean_expr(area_col, "address_count")
|
||||
.round(1)
|
||||
.cast(pl.Float32)
|
||||
.alias(f"Street average {area_col}"),
|
||||
_weighted_mean_expr(count_col, "address_count")
|
||||
.round(1)
|
||||
.cast(pl.Float32)
|
||||
.alias(f"Street average {count_col}"),
|
||||
_weighted_mean_expr(height_col, "address_count")
|
||||
.round(1)
|
||||
.cast(pl.Float32)
|
||||
.alias(f"Street average {height_col}"),
|
||||
)
|
||||
.with_columns(
|
||||
_coverage_percentile_expr(
|
||||
STREET_TREE_COVERAGE_COL,
|
||||
STREET_TREE_DENSITY_COL,
|
||||
)
|
||||
)
|
||||
.sort("street_key")
|
||||
.collect()
|
||||
)
|
||||
|
||||
if output_addresses is not None:
|
||||
output_addresses.parent.mkdir(parents=True, exist_ok=True)
|
||||
address_output = addresses.join(
|
||||
streets.lazy().select(
|
||||
"street_key",
|
||||
STREET_TREE_COVERAGE_COL,
|
||||
STREET_TREE_DENSITY_COL,
|
||||
),
|
||||
on="street_key",
|
||||
how="left",
|
||||
)
|
||||
address_output.sink_parquet(output_addresses, compression="zstd")
|
||||
print(f"Wrote address tree-density join: {output_addresses}")
|
||||
|
||||
if output_streets is not None:
|
||||
output_streets.parent.mkdir(parents=True, exist_ok=True)
|
||||
streets.write_parquet(output_streets, compression="zstd")
|
||||
print(f"Wrote street tree-density rollup: {output_streets}")
|
||||
|
||||
|
||||
def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
|
||||
if value is None:
|
||||
return None
|
||||
if value.lower() == "all":
|
||||
return None
|
||||
parts = tuple(part.strip() for part in value.split(",") if part.strip())
|
||||
return parts or None
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Build postcode and street tree-density metrics from FR_TOW_V1_ALL.zip"
|
||||
description="Build postcode-level tree-density metrics from FR_TOW_V1_ALL.zip"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tow-zip",
|
||||
|
|
@ -716,35 +525,17 @@ def main() -> None:
|
|||
default=Path("property-data/arcgis_data.parquet"),
|
||||
help="Postcode centroid parquet with east1m/north1m columns",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--price-paid",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional Price Paid parquet used to roll postcode metrics up to streets",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-postcodes",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Output postcode-level tree-density parquet",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-streets",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional output street-level tree-density parquet",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-addresses",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional output address/street join parquet keyed by postcode and pp_address",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--radius-m",
|
||||
type=int,
|
||||
default=50,
|
||||
help="Radius around each postcode centroid used as the street-scale buffer",
|
||||
help="Radius around each postcode centroid used as the extended buffer",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--layers",
|
||||
|
|
@ -757,12 +548,6 @@ def main() -> None:
|
|||
default=65_536,
|
||||
help="Arrow batch size for reading TOW features",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=-1,
|
||||
help="Worker count passed to scipy cKDTree.query_ball_point",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-postcodes",
|
||||
type=int,
|
||||
|
|
@ -783,9 +568,6 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if (args.output_streets or args.output_addresses) and args.price_paid is None:
|
||||
raise SystemExit("--price-paid is required when writing street/address outputs")
|
||||
|
||||
if args.radius_m <= 0:
|
||||
raise SystemExit("--radius-m must be greater than zero")
|
||||
|
||||
|
|
@ -797,36 +579,32 @@ def main() -> None:
|
|||
|
||||
n_points = points.height
|
||||
canopy_area = np.zeros(n_points, dtype=np.float64)
|
||||
feature_count = np.zeros(n_points, dtype=np.uint32)
|
||||
height_weighted_sum = np.zeros(n_points, dtype=np.float64)
|
||||
height_weight = np.zeros(n_points, dtype=np.float64)
|
||||
|
||||
_accumulate_tree_metrics(
|
||||
circles, tree = _postcode_buffers(points, args.radius_m)
|
||||
|
||||
_accumulate_tow_metrics(
|
||||
dataset_path=dataset_path,
|
||||
points=points,
|
||||
radius_m=args.radius_m,
|
||||
circles=circles,
|
||||
tree=tree,
|
||||
canopy_area=canopy_area,
|
||||
height_weighted_sum=height_weighted_sum,
|
||||
height_weight=height_weight,
|
||||
batch_size=args.batch_size,
|
||||
layer_names=layer_names,
|
||||
max_features_per_layer=args.max_features_per_layer,
|
||||
workers=args.workers,
|
||||
canopy_area=canopy_area,
|
||||
feature_count=feature_count,
|
||||
height_weighted_sum=height_weighted_sum,
|
||||
height_weight=height_weight,
|
||||
)
|
||||
|
||||
if args.nfi_zip is not None and args.nfi_zip.exists():
|
||||
nfi_path = _nfi_dataset_path(
|
||||
args.nfi_zip, args.nfi_extract_dir, args.force_extract, args.use_vsizip
|
||||
)
|
||||
circles, nfi_tree = _postcode_buffers(points, args.radius_m)
|
||||
_accumulate_nfi_metrics(
|
||||
dataset_path=nfi_path,
|
||||
circles=circles,
|
||||
tree=nfi_tree,
|
||||
tree=tree,
|
||||
canopy_area=canopy_area,
|
||||
feature_count=feature_count,
|
||||
radius_m=args.radius_m,
|
||||
batch_size=args.batch_size,
|
||||
max_nfi_features=args.max_nfi_features,
|
||||
)
|
||||
|
|
@ -836,7 +614,6 @@ def main() -> None:
|
|||
postcode_metrics = _finalize_metrics(
|
||||
points,
|
||||
canopy_area,
|
||||
feature_count,
|
||||
height_weighted_sum,
|
||||
height_weight,
|
||||
args.radius_m,
|
||||
|
|
@ -849,14 +626,14 @@ def main() -> None:
|
|||
postcode_metrics.write_parquet(args.output_postcodes, compression="zstd")
|
||||
print(f"\nWrote postcode tree-density metrics: {args.output_postcodes}")
|
||||
|
||||
if args.price_paid is not None:
|
||||
_write_street_rollups(
|
||||
postcode_metrics=postcode_metrics,
|
||||
price_paid_path=args.price_paid,
|
||||
output_streets=args.output_streets,
|
||||
output_addresses=args.output_addresses,
|
||||
radius_m=args.radius_m,
|
||||
)
|
||||
|
||||
def _parse_csv_arg(value: str | None) -> tuple[str, ...] | None:
|
||||
if value is None:
|
||||
return None
|
||||
if value.lower() == "all":
|
||||
return None
|
||||
parts = tuple(part.strip() for part in value.split(",") if part.strip())
|
||||
return parts or None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ def _write_tree_geojsonseq(
|
|||
columns=columns,
|
||||
batch_size=batch_size,
|
||||
use_pyarrow=True,
|
||||
) as (_meta, reader):
|
||||
) as (meta, reader):
|
||||
for batch in reader:
|
||||
if max_features_per_layer is not None:
|
||||
remaining = max_features_per_layer - layer_features_seen
|
||||
|
|
@ -101,6 +101,7 @@ def _write_tree_geojsonseq(
|
|||
|
||||
layer_features_seen += batch.num_rows
|
||||
names = batch.schema.names
|
||||
geometry_column = _geometry_column(meta, names)
|
||||
area = np.asarray(
|
||||
batch.column(names.index("TOW_Area_M")).to_numpy(
|
||||
zero_copy_only=False
|
||||
|
|
@ -108,7 +109,7 @@ def _write_tree_geojsonseq(
|
|||
dtype=np.float64,
|
||||
)
|
||||
geometry = np.asarray(
|
||||
batch.column(names.index("SHAPE")).to_numpy(
|
||||
batch.column(names.index(geometry_column)).to_numpy(
|
||||
zero_copy_only=False
|
||||
),
|
||||
dtype=object,
|
||||
|
|
@ -327,7 +328,7 @@ def build_tree_overlay_tiles(
|
|||
str(min_zoom),
|
||||
"--maximum-zoom",
|
||||
str(max_zoom),
|
||||
"--drop-smallest-as-needed",
|
||||
"--coalesce-smallest-as-needed",
|
||||
"--extend-zooms-if-still-dropping",
|
||||
"--temporary-directory",
|
||||
tmp,
|
||||
|
|
|
|||
|
|
@ -13,7 +13,11 @@ def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
|
|||
Uses OS National Grid coordinates (east1m, north1m) which are Cartesian metres,
|
||||
so Euclidean distance via cKDTree gives accurate results without projection.
|
||||
"""
|
||||
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry25cd") == "E92000001")
|
||||
arcgis = (
|
||||
pl.scan_parquet(arcgis_path)
|
||||
.filter(pl.col("ctry25cd") == "E92000001")
|
||||
.with_columns(pl.col("doterm").cast(pl.Utf8).alias("doterm"))
|
||||
)
|
||||
|
||||
active = (
|
||||
arcgis.filter(pl.col("doterm").is_null())
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import zipfile
|
|||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
from shapely.geometry import shape
|
||||
from shapely.validation import explain_validity
|
||||
|
||||
|
||||
def _failures_for_file(path: Path) -> list[str]:
|
||||
|
|
@ -79,9 +81,7 @@ def _split_glob(spec: str) -> tuple[Path, str]:
|
|||
|
||||
def _split_pair(spec: str, label: str) -> tuple[Path, Path]:
|
||||
if "::" not in spec:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"{spec!r} must use LEFT::RIGHT for {label}"
|
||||
)
|
||||
raise argparse.ArgumentTypeError(f"{spec!r} must use LEFT::RIGHT for {label}")
|
||||
left, right = spec.split("::", 1)
|
||||
if not left or not right:
|
||||
raise argparse.ArgumentTypeError(f"{spec!r} must include both paths")
|
||||
|
|
@ -143,22 +143,140 @@ def _parquet_postcodes(path: Path) -> set[str]:
|
|||
.get_column(column)
|
||||
.to_list()
|
||||
)
|
||||
return {_canonical_postcode(value) for value in values if _canonical_postcode(value)}
|
||||
return {
|
||||
_canonical_postcode(value) for value in values if _canonical_postcode(value)
|
||||
}
|
||||
|
||||
|
||||
def _active_english_arcgis_postcodes(path: Path) -> set[str]:
|
||||
schema = pl.scan_parquet(path).collect_schema()
|
||||
required = {"pcds", "ctry25cd", "doterm"}
|
||||
missing = sorted(required - set(schema.names()))
|
||||
if missing:
|
||||
raise ValueError(f"{path}: missing ArcGIS postcode columns: {missing}")
|
||||
values = (
|
||||
pl.read_parquet(path, columns=["pcds", "ctry25cd", "doterm"])
|
||||
.lazy()
|
||||
.filter(pl.col("ctry25cd") == "E92000001")
|
||||
.filter(pl.col("doterm").cast(pl.Utf8).is_null())
|
||||
.select(pl.col("pcds").drop_nulls().unique())
|
||||
.collect()
|
||||
.get_column("pcds")
|
||||
.to_list()
|
||||
)
|
||||
return {
|
||||
_canonical_postcode(value) for value in values if _canonical_postcode(value)
|
||||
}
|
||||
|
||||
|
||||
def _format_samples(samples: list[str]) -> str:
|
||||
return "; ".join(samples[:10])
|
||||
|
||||
|
||||
def _boundary_postcode_scan(path: Path) -> tuple[set[str], list[str]]:
|
||||
units_dir = path / "units" if (path / "units").is_dir() else path
|
||||
postcodes: set[str] = set()
|
||||
seen: dict[str, str] = {}
|
||||
failures: list[str] = []
|
||||
missing_postcode_samples: list[str] = []
|
||||
missing_geometry_samples: list[str] = []
|
||||
non_polygon_samples: list[str] = []
|
||||
invalid_geometry_samples: list[str] = []
|
||||
duplicate_samples: list[str] = []
|
||||
missing_postcode_count = 0
|
||||
missing_geometry_count = 0
|
||||
non_polygon_count = 0
|
||||
invalid_geometry_count = 0
|
||||
duplicate_count = 0
|
||||
|
||||
for geojson_path in sorted(units_dir.glob("*.geojson")):
|
||||
try:
|
||||
with geojson_path.open("r", encoding="utf-8") as handle:
|
||||
data = json.load(handle)
|
||||
except Exception as exc:
|
||||
failures.append(f"{geojson_path}: unreadable GeoJSON: {exc}")
|
||||
continue
|
||||
|
||||
for idx, feature in enumerate(data.get("features", [])):
|
||||
label = f"{geojson_path.name} feature {idx}"
|
||||
properties = feature.get("properties") or {}
|
||||
value = properties.get("postcodes")
|
||||
postcode = _canonical_postcode(value) if value is not None else ""
|
||||
if not postcode:
|
||||
missing_postcode_count += 1
|
||||
if len(missing_postcode_samples) < 10:
|
||||
missing_postcode_samples.append(label)
|
||||
else:
|
||||
if postcode in seen:
|
||||
duplicate_count += 1
|
||||
if len(duplicate_samples) < 10:
|
||||
duplicate_samples.append(
|
||||
f"{postcode} in {seen[postcode]} and {label}"
|
||||
)
|
||||
else:
|
||||
seen[postcode] = label
|
||||
postcodes.add(postcode)
|
||||
|
||||
geometry_data = feature.get("geometry")
|
||||
if geometry_data is None:
|
||||
missing_geometry_count += 1
|
||||
if len(missing_geometry_samples) < 10:
|
||||
missing_geometry_samples.append(f"{postcode or label}")
|
||||
continue
|
||||
try:
|
||||
geom = shape(geometry_data)
|
||||
except Exception as exc:
|
||||
invalid_geometry_count += 1
|
||||
if len(invalid_geometry_samples) < 10:
|
||||
invalid_geometry_samples.append(f"{postcode or label}: {exc}")
|
||||
continue
|
||||
if geom.is_empty:
|
||||
missing_geometry_count += 1
|
||||
if len(missing_geometry_samples) < 10:
|
||||
missing_geometry_samples.append(f"{postcode or label}: empty")
|
||||
elif geom.geom_type not in {"Polygon", "MultiPolygon"}:
|
||||
non_polygon_count += 1
|
||||
if len(non_polygon_samples) < 10:
|
||||
non_polygon_samples.append(f"{postcode or label}: {geom.geom_type}")
|
||||
elif not geom.is_valid:
|
||||
invalid_geometry_count += 1
|
||||
if len(invalid_geometry_samples) < 10:
|
||||
invalid_geometry_samples.append(
|
||||
f"{postcode or label}: {explain_validity(geom)}"
|
||||
)
|
||||
|
||||
if missing_postcode_count:
|
||||
failures.append(
|
||||
f"{path}: {missing_postcode_count:,} boundary features are missing "
|
||||
f"properties.postcodes; sample: {_format_samples(missing_postcode_samples)}"
|
||||
)
|
||||
if duplicate_count:
|
||||
failures.append(
|
||||
f"{path}: {duplicate_count:,} duplicate boundary postcode features; "
|
||||
f"sample: {_format_samples(duplicate_samples)}"
|
||||
)
|
||||
if missing_geometry_count:
|
||||
failures.append(
|
||||
f"{path}: {missing_geometry_count:,} boundary features are missing or empty "
|
||||
f"geometry; sample: {_format_samples(missing_geometry_samples)}"
|
||||
)
|
||||
if non_polygon_count:
|
||||
failures.append(
|
||||
f"{path}: {non_polygon_count:,} boundary features are not polygonal; "
|
||||
f"sample: {_format_samples(non_polygon_samples)}"
|
||||
)
|
||||
if invalid_geometry_count:
|
||||
failures.append(
|
||||
f"{path}: {invalid_geometry_count:,} invalid boundary geometries; "
|
||||
f"sample: {_format_samples(invalid_geometry_samples)}"
|
||||
)
|
||||
return postcodes, failures
|
||||
|
||||
|
||||
def _boundary_postcodes(path: Path) -> set[str]:
|
||||
units_dir = path / "units" if (path / "units").is_dir() else path
|
||||
postcodes: set[str] = set()
|
||||
for geojson_path in sorted(units_dir.glob("*.geojson")):
|
||||
with geojson_path.open("r", encoding="utf-8") as handle:
|
||||
data = json.load(handle)
|
||||
for feature in data.get("features", []):
|
||||
properties = feature.get("properties") or {}
|
||||
value = properties.get("postcodes")
|
||||
if value is not None:
|
||||
postcode = _canonical_postcode(value)
|
||||
if postcode:
|
||||
postcodes.add(postcode)
|
||||
postcodes, failures = _boundary_postcode_scan(path)
|
||||
if failures:
|
||||
raise ValueError("; ".join(failures))
|
||||
return postcodes
|
||||
|
||||
|
||||
|
|
@ -174,11 +292,13 @@ def _failures_for_postcode_boundary_match(spec: str) -> list[str]:
|
|||
|
||||
try:
|
||||
parquet_postcodes = _parquet_postcodes(parquet_path)
|
||||
boundary_postcodes = _boundary_postcodes(boundaries_path)
|
||||
boundary_postcodes, boundary_failures = _boundary_postcode_scan(boundaries_path)
|
||||
except Exception as exc:
|
||||
return [f"{parquet_path} / {boundaries_path}: postcode match check failed: {exc}"]
|
||||
return [
|
||||
f"{parquet_path} / {boundaries_path}: postcode match check failed: {exc}"
|
||||
]
|
||||
|
||||
failures = []
|
||||
failures = list(boundary_failures)
|
||||
if not boundary_postcodes:
|
||||
failures.append(f"{boundaries_path}: no boundary postcodes found")
|
||||
|
||||
|
|
@ -197,6 +317,41 @@ def _failures_for_postcode_boundary_match(spec: str) -> list[str]:
|
|||
return failures
|
||||
|
||||
|
||||
def _failures_for_active_postcode_boundary_match(spec: str) -> list[str]:
|
||||
arcgis_path, boundaries_path = _split_pair(
|
||||
spec, "active postcode boundary matching"
|
||||
)
|
||||
failures = _failures_for_parquet(arcgis_path) + _failures_for_dir(boundaries_path)
|
||||
if failures:
|
||||
return failures
|
||||
|
||||
try:
|
||||
active_postcodes = _active_english_arcgis_postcodes(arcgis_path)
|
||||
boundary_postcodes, boundary_failures = _boundary_postcode_scan(boundaries_path)
|
||||
except Exception as exc:
|
||||
return [
|
||||
f"{arcgis_path} / {boundaries_path}: active postcode boundary check failed: {exc}"
|
||||
]
|
||||
|
||||
failures = list(boundary_failures)
|
||||
if not boundary_postcodes:
|
||||
failures.append(f"{boundaries_path}: no boundary postcodes found")
|
||||
|
||||
missing_boundaries = active_postcodes - boundary_postcodes
|
||||
orphan_boundaries = boundary_postcodes - active_postcodes
|
||||
if missing_boundaries:
|
||||
failures.append(
|
||||
f"{boundaries_path}: {len(missing_boundaries):,} active English postcodes "
|
||||
f"from {arcgis_path} are missing boundaries; sample: {_sample(missing_boundaries)}"
|
||||
)
|
||||
if orphan_boundaries:
|
||||
failures.append(
|
||||
f"{boundaries_path}: {len(orphan_boundaries):,} boundary postcodes are not "
|
||||
f"active English postcodes in {arcgis_path}; sample: {_sample(orphan_boundaries)}"
|
||||
)
|
||||
return failures
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--file", action="append", default=[], type=Path)
|
||||
|
|
@ -221,6 +376,15 @@ def main() -> int:
|
|||
default=[],
|
||||
help="Require postcode parquet keys to exactly match boundary GeoJSON postcodes: PARQUET::DIR",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--active-postcode-boundary-match",
|
||||
action="append",
|
||||
default=[],
|
||||
help=(
|
||||
"Require active English ArcGIS postcodes to exactly match boundary "
|
||||
"GeoJSON postcodes: ARCGIS_PARQUET::DIR"
|
||||
),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
failures: list[str] = []
|
||||
|
|
@ -238,6 +402,8 @@ def main() -> int:
|
|||
failures.extend(_failures_for_zip_glob(spec))
|
||||
for spec in args.postcode_boundary_match:
|
||||
failures.extend(_failures_for_postcode_boundary_match(spec))
|
||||
for spec in args.active_postcode_boundary_match:
|
||||
failures.extend(_failures_for_active_postcode_boundary_match(spec))
|
||||
|
||||
if failures:
|
||||
print("Output validation failed:", file=sys.stderr)
|
||||
|
|
|
|||
|
|
@ -282,17 +282,23 @@ pub fn compute_crime_by_year(
|
|||
|
||||
for &row in matching_rows {
|
||||
let postcode = data.postcode(row);
|
||||
let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// For every type the postcode reports, add its per-year counts.
|
||||
// For types it doesn't report, treat the row as contributing 0 — so we
|
||||
// bump the row count for *every* known type below.
|
||||
for series in series_list {
|
||||
let acc = &mut per_type_year_sums[series.type_idx as usize];
|
||||
for point in &series.points {
|
||||
*acc.entry(point.year).or_insert(0.0) += point.count as f64;
|
||||
// A postcode absent from the by-year table has no recorded crime within
|
||||
// 50m, so it contributes 0 to every type's per-year sum. It must still be
|
||||
// counted in the denominator: the matching `(avg/yr)` stat counts those
|
||||
// same zero-crime postcodes as 0.0 (crime_by_postcode.parquet has a dense
|
||||
// row for every boundary postcode), so excluding them here would compute
|
||||
// the chart over a smaller population and report a higher magnitude than
|
||||
// the headline. Property postcodes are guaranteed to be boundary
|
||||
// postcodes by the postcode-boundary-match validation, so "absent" means
|
||||
// genuinely zero-crime, not missing data.
|
||||
if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
|
||||
// For every type the postcode reports, add its per-year counts.
|
||||
for series in series_list {
|
||||
let acc = &mut per_type_year_sums[series.type_idx as usize];
|
||||
for point in &series.points {
|
||||
*acc.entry(point.year).or_insert(0.0) += point.count as f64;
|
||||
}
|
||||
}
|
||||
}
|
||||
for c in per_type_row_counts.iter_mut() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue