good
This commit is contained in:
parent
81a16f543c
commit
63713c3a2b
15 changed files with 492 additions and 159 deletions
|
|
@ -12,9 +12,9 @@ const ROUTES = [
|
|||
{
|
||||
path: '/',
|
||||
output: 'index.html',
|
||||
title: 'Find the best postcodes and areas to live in England | Perfect Postcode',
|
||||
title: 'Stop searching the wrong places | Perfect Postcode',
|
||||
description:
|
||||
'Discover where to live by comparing England postcodes by budget, commute, schools, crime, noise, broadband, property prices and local amenities before viewing homes.',
|
||||
'Filter every postcode in England by budget, commute, schools, crime, noise, broadband, property prices and amenities before you start chasing viewings.',
|
||||
},
|
||||
{
|
||||
path: '/learn',
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { useState, useCallback, useRef, useEffect } from 'react';
|
|||
import { useTranslation } from 'react-i18next';
|
||||
import type { MapFlyToOptions, PostcodeGeometry } from '../../types';
|
||||
import { authHeaders } from '../../lib/api';
|
||||
import { POSTCODE_SEARCH_ZOOM } from '../../lib/consts';
|
||||
import { useIsMobile } from '../../hooks/useIsMobile';
|
||||
import { useLocationSearch, type SearchResult } from '../../hooks/useLocationSearch';
|
||||
import { PlaceSearchInput } from '../ui/PlaceSearchInput';
|
||||
|
|
@ -148,7 +149,7 @@ export default function LocationSearch({
|
|||
longitude: number;
|
||||
geometry: PostcodeGeometry;
|
||||
} = await res.json();
|
||||
onFlyTo(json.latitude, json.longitude, 16);
|
||||
onFlyTo(json.latitude, json.longitude, POSTCODE_SEARCH_ZOOM);
|
||||
onLocationSearched?.({
|
||||
postcode: json.postcode,
|
||||
geometry: json.geometry,
|
||||
|
|
|
|||
|
|
@ -410,16 +410,16 @@ const en = {
|
|||
|
||||
// ── Home Page ──────────────────────────────────────
|
||||
home: {
|
||||
heroEyebrow: "For buyers who don't know where to start",
|
||||
heroTitle1: 'Start with your needs,',
|
||||
heroTitle2: 'not an area name',
|
||||
heroTitle3: 'Then shortlist postcodes worth viewing.',
|
||||
heroEyebrow: 'Find where to look first',
|
||||
heroTitle1: 'Stop searching',
|
||||
heroTitle2: 'the wrong places',
|
||||
heroTitle3: 'Before listings take over.',
|
||||
heroSubtitle:
|
||||
'Most buyers start with a few familiar areas, then stitch together listing sites, commute checks, school reports, crime maps, broadband tools, and sold prices in separate tabs.',
|
||||
'Find postcodes where your budget, commute, and daily life line up.',
|
||||
heroDescription:
|
||||
'Set your budget, commute, schools, safety, noise, broadband, parks, shops, and property needs. Perfect Postcode checks postcodes across England and shows the areas worth shortlisting, including places you may not know by name.',
|
||||
exploreTheMap: 'Start matching postcodes',
|
||||
seeTheDifference: 'Watch the demo',
|
||||
'Perfect Postcode shows where to look before you start chasing viewings.',
|
||||
exploreTheMap: 'Show me where to look',
|
||||
seeTheDifference: 'Watch demo',
|
||||
productDemoLabel: 'Watch the postcode shortlist demo',
|
||||
playProductDemo: 'Play the postcode shortlist demo',
|
||||
scrollToProductDemo: 'Scroll to product demo',
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
<meta name="theme-color" content="#fafaf9" media="(prefers-color-scheme: light)" />
|
||||
<meta name="theme-color" content="#0a0e1a" media="(prefers-color-scheme: dark)" />
|
||||
<meta name="referrer" content="no-referrer" />
|
||||
<title>Find the best postcodes and areas to live in England | Perfect Postcode</title>
|
||||
<meta name="description" content="Discover where to live by comparing England postcodes by budget, commute, schools, crime, noise, broadband, property prices and local amenities before viewing homes." />
|
||||
<title>Stop searching the wrong places | Perfect Postcode</title>
|
||||
<meta name="description" content="Filter every postcode in England by budget, commute, schools, crime, noise, broadband, property prices and amenities before you start chasing viewings." />
|
||||
<meta name="x-og-placeholder" content="__PERFECT_POSTCODE_OG_TAGS__" />
|
||||
<script>
|
||||
(function() {
|
||||
|
|
|
|||
|
|
@ -141,19 +141,21 @@ describe('api utilities', () => {
|
|||
|
||||
it('serializes amenity distance filters using their selected backend feature', () => {
|
||||
const features: FeatureMeta[] = [
|
||||
{ name: 'Distance to nearest park (km)', type: 'numeric', min: 0, max: 2 },
|
||||
{ name: 'Distance to nearest grocery store (km)', type: 'numeric', min: 0, max: 5 },
|
||||
{ name: 'Distance to nearest amenity (Park) (km)', type: 'numeric', min: 0, max: 2 },
|
||||
{ name: 'Distance to nearest amenity (Café) (km)', type: 'numeric', min: 0, max: 5 },
|
||||
];
|
||||
|
||||
expect(
|
||||
buildFilterString(
|
||||
{
|
||||
[createPoiDistanceFilterKey('Distance to nearest park (km)', 1)]: [0, 0.5],
|
||||
[createPoiDistanceFilterKey('Distance to nearest grocery store (km)', 2)]: [0, 1],
|
||||
[createPoiDistanceFilterKey('Distance to nearest amenity (Park) (km)', 1)]: [0, 0.5],
|
||||
[createPoiDistanceFilterKey('Distance to nearest amenity (Café) (km)', 2)]: [0, 1],
|
||||
},
|
||||
features
|
||||
)
|
||||
).toBe('Distance to nearest park (km):0:0.5;;Distance to nearest grocery store (km):0:1');
|
||||
).toBe(
|
||||
'Distance to nearest amenity (Park) (km):0:0.5;;Distance to nearest amenity (Café) (km):0:1'
|
||||
);
|
||||
});
|
||||
|
||||
it('serializes amenity count filters using their selected backend feature', () => {
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ export const SMALLEST_VISIBLE_HEXAGON_RESOLUTION = Math.max(
|
|||
);
|
||||
|
||||
export const POSTCODE_ZOOM_THRESHOLD = 15;
|
||||
export const POSTCODE_SEARCH_ZOOM = 16;
|
||||
|
||||
export const FEATURE_GRADIENT: { t: number; color: [number, number, number] }[] = [
|
||||
{ t: 0, color: [46, 204, 113] },
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ const FEATURE_ICON_PATHS: Record<string, ReactNode> = {
|
|||
<polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2" />
|
||||
</>
|
||||
),
|
||||
'Street tree density (%)': (
|
||||
'Street tree density percentile': (
|
||||
<>
|
||||
<path d="M12 22V12" />
|
||||
<path d="M6 22h12" />
|
||||
|
|
|
|||
|
|
@ -1,39 +1,13 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import type { FeatureGroup, FeatureMeta } from '../types';
|
||||
import { groupFeaturesByCategory, orderFilterGroups } from './features';
|
||||
|
||||
function group(name: string): FeatureGroup {
|
||||
return { name, features: [] };
|
||||
}
|
||||
import type { FeatureMeta } from '../types';
|
||||
import { groupFeaturesByCategory } from './features';
|
||||
|
||||
function feature(name: string, groupName: string): FeatureMeta {
|
||||
return { name, group: groupName, type: 'numeric' };
|
||||
}
|
||||
|
||||
describe('feature grouping utilities', () => {
|
||||
it('orders filter groups around transport, property, amenities, and area development', () => {
|
||||
const groups = [
|
||||
group('Properties'),
|
||||
group('Education'),
|
||||
group('Area development'),
|
||||
group('Property prices'),
|
||||
group('Crime'),
|
||||
group('Amenities'),
|
||||
group('Transport'),
|
||||
];
|
||||
|
||||
expect(orderFilterGroups(groups).map((item) => item.name)).toEqual([
|
||||
'Transport',
|
||||
'Property prices',
|
||||
'Properties',
|
||||
'Amenities',
|
||||
'Education',
|
||||
'Crime',
|
||||
'Area development',
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps feature order inside grouped categories', () => {
|
||||
const groups = groupFeaturesByCategory([
|
||||
feature('A', 'Crime'),
|
||||
|
|
|
|||
64
frontend/src/lib/poi-distance-filter.test.ts
Normal file
64
frontend/src/lib/poi-distance-filter.test.ts
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import type { FeatureMeta } from '../types';
|
||||
import {
|
||||
POI_COUNT_2KM_FILTER_NAME,
|
||||
POI_DISTANCE_FILTER_NAME,
|
||||
TRANSPORT_DISTANCE_FILTER_NAME,
|
||||
getPoiFilterFeatureOptions,
|
||||
getPoiFilterName,
|
||||
} from './poi-distance-filter';
|
||||
|
||||
const numeric = (name: string): FeatureMeta => ({
|
||||
name,
|
||||
type: 'numeric',
|
||||
min: 0,
|
||||
max: 5,
|
||||
});
|
||||
|
||||
describe('poi-distance-filter', () => {
|
||||
it('splits public transport distance options out of amenity distance options', () => {
|
||||
const features = [
|
||||
numeric('Distance to nearest amenity (Cafe) (km)'),
|
||||
numeric('Distance to nearest amenity (Park) (km)'),
|
||||
numeric('Distance to nearest amenity (Bus stop) (km)'),
|
||||
numeric('Distance to nearest amenity (Rail station) (km)'),
|
||||
];
|
||||
|
||||
expect(
|
||||
getPoiFilterFeatureOptions(features, POI_DISTANCE_FILTER_NAME).map((f) => f.name)
|
||||
).toEqual(['Distance to nearest amenity (Cafe) (km)', 'Distance to nearest amenity (Park) (km)']);
|
||||
expect(
|
||||
getPoiFilterFeatureOptions(features, TRANSPORT_DISTANCE_FILTER_NAME).map((f) => f.name)
|
||||
).toEqual([
|
||||
'Distance to nearest amenity (Bus stop) (km)',
|
||||
'Distance to nearest amenity (Rail station) (km)',
|
||||
]);
|
||||
});
|
||||
|
||||
it('excludes public transport categories from amenity count options', () => {
|
||||
const features = [
|
||||
numeric('Number of amenities (Cafe) within 2km'),
|
||||
numeric('Number of amenities (Bus stop) within 2km'),
|
||||
numeric('Number of amenities (Rail station) within 2km'),
|
||||
];
|
||||
|
||||
expect(
|
||||
getPoiFilterFeatureOptions(features, POI_COUNT_2KM_FILTER_NAME).map((f) => f.name)
|
||||
).toEqual(['Number of amenities (Cafe) within 2km']);
|
||||
});
|
||||
|
||||
it('classifies transport distance features without exposing transport counts', () => {
|
||||
expect(getPoiFilterName('Distance to nearest amenity (Bus stop) (km)')).toBe(
|
||||
TRANSPORT_DISTANCE_FILTER_NAME
|
||||
);
|
||||
expect(getPoiFilterName('Number of amenities (Bus stop) within 2km')).toBeNull();
|
||||
});
|
||||
|
||||
it('recognizes the old static park distance name for URL migration only', () => {
|
||||
expect(getPoiFilterName('Distance to nearest park (km)')).toBe(POI_DISTANCE_FILTER_NAME);
|
||||
expect(
|
||||
getPoiFilterFeatureOptions([numeric('Distance to nearest park (km)')], POI_DISTANCE_FILTER_NAME)
|
||||
).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
import type { FeatureFilters, FeatureMeta } from '../types';
|
||||
|
||||
export const POI_DISTANCE_FILTER_NAME = 'Amenity distance';
|
||||
export const TRANSPORT_DISTANCE_FILTER_NAME = 'Closest transport option';
|
||||
export const POI_COUNT_2KM_FILTER_NAME = 'Amenities within 2km';
|
||||
export const POI_COUNT_5KM_FILTER_NAME = 'Amenities within 5km';
|
||||
|
||||
export const POI_FILTER_NAMES = [
|
||||
TRANSPORT_DISTANCE_FILTER_NAME,
|
||||
POI_DISTANCE_FILTER_NAME,
|
||||
POI_COUNT_2KM_FILTER_NAME,
|
||||
POI_COUNT_5KM_FILTER_NAME,
|
||||
|
|
@ -15,23 +17,15 @@ type PoiMetric = 'distance' | 'count_2km' | 'count_5km';
|
|||
|
||||
export const POI_DISTANCE_FILTER_KEY_PREFIX = `${POI_DISTANCE_FILTER_NAME}:`;
|
||||
|
||||
export const POI_DISTANCE_FEATURE_NAMES = [
|
||||
'Distance to nearest park (km)',
|
||||
'Distance to nearest grocery store (km)',
|
||||
'Distance to nearest tube station (km)',
|
||||
'Distance to nearest rail station (km)',
|
||||
'Distance to nearest Waitrose (km)',
|
||||
'Distance to nearest Tesco (km)',
|
||||
'Distance to nearest cafe (km)',
|
||||
'Distance to nearest pub (km)',
|
||||
'Distance to nearest restaurant (km)',
|
||||
] as const;
|
||||
|
||||
const STATIC_AMENITY_DISTANCE_FEATURE_NAME_SET = new Set<string>(POI_DISTANCE_FEATURE_NAMES);
|
||||
const STATIC_AMENITY_DISTANCE_AGGREGATE_OPTIONS = [
|
||||
'Distance to nearest park (km)',
|
||||
'Distance to nearest grocery store (km)',
|
||||
] as const;
|
||||
const TRANSPORT_POI_CATEGORIES = new Set([
|
||||
'Airport',
|
||||
'Bus station',
|
||||
'Bus stop',
|
||||
'Ferry',
|
||||
'Rail station',
|
||||
'Taxi rank',
|
||||
'Tube station',
|
||||
]);
|
||||
|
||||
const DYNAMIC_DISTANCE_RE = /^Distance to nearest amenity \((.+)\) \(km\)$/;
|
||||
const DYNAMIC_COUNT_RE = /^Number of amenities \((.+)\) within (2|5)km$/;
|
||||
|
|
@ -57,6 +51,15 @@ const POI_FILTER_CONFIGS: Record<
|
|||
step: 0.1,
|
||||
suffix: ' km',
|
||||
},
|
||||
[TRANSPORT_DISTANCE_FILTER_NAME]: {
|
||||
metric: 'distance',
|
||||
keyPrefix: `${TRANSPORT_DISTANCE_FILTER_NAME}:`,
|
||||
description: 'Distance to nearby transport stops',
|
||||
detail: 'Filter by distance to one nearby public transport type at a time.',
|
||||
defaultMax: 5,
|
||||
step: 0.1,
|
||||
suffix: ' km',
|
||||
},
|
||||
[POI_COUNT_2KM_FILTER_NAME]: {
|
||||
metric: 'count_2km',
|
||||
keyPrefix: `${POI_COUNT_2KM_FILTER_NAME}:`,
|
||||
|
|
@ -86,10 +89,7 @@ function isDynamicPoiDistanceFeatureName(name: string): boolean {
|
|||
}
|
||||
|
||||
function getPoiMetric(name: string): PoiMetric | null {
|
||||
if (
|
||||
isDynamicPoiDistanceFeatureName(name) ||
|
||||
STATIC_AMENITY_DISTANCE_FEATURE_NAME_SET.has(name)
|
||||
) {
|
||||
if (DYNAMIC_DISTANCE_RE.test(name)) {
|
||||
return 'distance';
|
||||
}
|
||||
|
||||
|
|
@ -98,6 +98,24 @@ function getPoiMetric(name: string): PoiMetric | null {
|
|||
return countMatch[2] === '2' ? 'count_2km' : 'count_5km';
|
||||
}
|
||||
|
||||
export function isTransportPoiFeatureName(name: string): boolean {
|
||||
const category = getPoiFeatureCategory(name);
|
||||
return category ? TRANSPORT_POI_CATEGORIES.has(category) : false;
|
||||
}
|
||||
|
||||
function getFilterNameForFeature(name: string): PoiFilterName | null {
|
||||
const metric = getPoiMetric(name);
|
||||
if (!metric) return null;
|
||||
|
||||
const isTransport = isTransportPoiFeatureName(name);
|
||||
if (metric === 'distance' && isTransport) return TRANSPORT_DISTANCE_FILTER_NAME;
|
||||
if (metric === 'distance') return POI_DISTANCE_FILTER_NAME;
|
||||
if (isTransport) return null;
|
||||
if (metric === 'count_2km') return POI_COUNT_2KM_FILTER_NAME;
|
||||
if (metric === 'count_5km') return POI_COUNT_5KM_FILTER_NAME;
|
||||
return null;
|
||||
}
|
||||
|
||||
function getFilterNameForMetric(metric: PoiMetric): PoiFilterName {
|
||||
if (metric === 'count_2km') return POI_COUNT_2KM_FILTER_NAME;
|
||||
if (metric === 'count_5km') return POI_COUNT_5KM_FILTER_NAME;
|
||||
|
|
@ -115,9 +133,7 @@ export function getPoiFeatureCategory(name: string): string | null {
|
|||
}
|
||||
|
||||
export function isPoiDistanceFeatureName(name: string): boolean {
|
||||
return (
|
||||
isDynamicPoiDistanceFeatureName(name) || STATIC_AMENITY_DISTANCE_FEATURE_NAME_SET.has(name)
|
||||
);
|
||||
return isDynamicPoiDistanceFeatureName(name);
|
||||
}
|
||||
|
||||
export function isPoiFilterFeatureName(name: string): boolean {
|
||||
|
|
@ -128,8 +144,7 @@ export function getPoiFilterName(name: string): PoiFilterName | null {
|
|||
for (const filterName of POI_FILTER_NAMES) {
|
||||
if (name.startsWith(getConfig(filterName).keyPrefix)) return filterName;
|
||||
}
|
||||
const metric = getPoiMetric(name);
|
||||
return metric ? getFilterNameForMetric(metric) : null;
|
||||
return getFilterNameForFeature(name);
|
||||
}
|
||||
|
||||
export function isPoiDistanceFilterName(name: string): boolean {
|
||||
|
|
@ -172,8 +187,7 @@ export function parsePoiFilterKey(name: string): string | null {
|
|||
if (lastColon === -1) return null;
|
||||
|
||||
const decoded = decodeURIComponent(rest.substring(0, lastColon));
|
||||
const metric = getPoiMetric(decoded);
|
||||
return metric === getConfig(filterName).metric ? decoded : null;
|
||||
return getFilterNameForFeature(decoded) === filterName ? decoded : null;
|
||||
}
|
||||
|
||||
export function parsePoiDistanceFilterKey(name: string): string | null {
|
||||
|
|
@ -186,7 +200,10 @@ export function getPoiDistanceFeatureName(name: string): string | null {
|
|||
}
|
||||
|
||||
export function replacePoiFilterKeySelection(key: string, featureName: string): string {
|
||||
const filterName = getPoiFilterName(key) ?? getFilterNameForMetric(getPoiMetric(featureName)!);
|
||||
const filterName =
|
||||
getPoiFilterName(key) ??
|
||||
getFilterNameForFeature(featureName) ??
|
||||
getFilterNameForMetric(getPoiMetric(featureName)!);
|
||||
const id = getPoiFilterKeyId(key) ?? '0';
|
||||
return createPoiFilterKey(filterName, featureName, id);
|
||||
}
|
||||
|
|
@ -203,23 +220,18 @@ export function getPoiFilterFeatureOptions(
|
|||
const dynamicOptions = features.filter((feature) => {
|
||||
const featureMetric = getPoiMetric(feature.name);
|
||||
if (featureMetric !== metric) return false;
|
||||
return metric !== 'distance' || isDynamicPoiDistanceFeatureName(feature.name);
|
||||
const isTransport = isTransportPoiFeatureName(feature.name);
|
||||
if (filterName === TRANSPORT_DISTANCE_FILTER_NAME) {
|
||||
return metric === 'distance' && isTransport;
|
||||
}
|
||||
if (isTransport) return false;
|
||||
return metric !== 'distance' || DYNAMIC_DISTANCE_RE.test(feature.name);
|
||||
});
|
||||
|
||||
if (dynamicOptions.length > 0 && metric === 'distance') {
|
||||
const aggregateOptions = STATIC_AMENITY_DISTANCE_AGGREGATE_OPTIONS.map((name) =>
|
||||
features.find((feature) => feature.name === name)
|
||||
).filter((feature): feature is FeatureMeta => Boolean(feature));
|
||||
return [...dynamicOptions, ...aggregateOptions];
|
||||
}
|
||||
|
||||
if (dynamicOptions.length > 0 || metric !== 'distance') {
|
||||
if (filterName === TRANSPORT_DISTANCE_FILTER_NAME) {
|
||||
return dynamicOptions;
|
||||
}
|
||||
|
||||
return POI_DISTANCE_FEATURE_NAMES.map((name) =>
|
||||
features.find((feature) => feature.name === name)
|
||||
).filter((feature): feature is FeatureMeta => Boolean(feature));
|
||||
return dynamicOptions;
|
||||
}
|
||||
|
||||
export function getDefaultPoiFilterFeatureName(
|
||||
|
|
@ -243,7 +255,7 @@ export function getPoiFilterMeta(features: FeatureMeta[], filterName: PoiFilterN
|
|||
return {
|
||||
name: filterName,
|
||||
type: 'numeric',
|
||||
group: 'Amenities',
|
||||
group: filterName === TRANSPORT_DISTANCE_FILTER_NAME ? 'Transport' : 'Amenities',
|
||||
min: sourceFeature?.min ?? 0,
|
||||
max: sourceFeature?.max ?? config.defaultMax,
|
||||
step: config.step,
|
||||
|
|
@ -264,7 +276,11 @@ export function normalizePoiDistanceFilters(filters: FeatureFilters): FeatureFil
|
|||
|
||||
for (const [name, value] of Object.entries(filters)) {
|
||||
if (isPoiFilterFeatureName(name)) {
|
||||
const filterName = getPoiFilterName(name) ?? POI_DISTANCE_FILTER_NAME;
|
||||
const filterName = getPoiFilterName(name);
|
||||
if (!filterName) {
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
next[createPoiFilterKey(filterName, name, Object.keys(next).length)] = value;
|
||||
changed = true;
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -168,15 +168,7 @@ function parseFilters(params: URLSearchParams): FeatureFilters {
|
|||
const min = Number(parts[parts.length - 2]);
|
||||
const max = Number(parts[parts.length - 1]);
|
||||
const targetFilterName = getPoiFilterName(featureName);
|
||||
const canMigrateTransportDistance =
|
||||
filterName === POI_DISTANCE_FILTER_NAME &&
|
||||
targetFilterName === TRANSPORT_DISTANCE_FILTER_NAME;
|
||||
if (
|
||||
!targetFilterName ||
|
||||
(targetFilterName !== filterName && !canMigrateTransportDistance) ||
|
||||
isNaN(min) ||
|
||||
isNaN(max)
|
||||
) {
|
||||
if (!targetFilterName || targetFilterName !== filterName || isNaN(min) || isNaN(max)) {
|
||||
return;
|
||||
}
|
||||
filters[createPoiFilterKey(targetFilterName, featureName, startIndex + index)] = [min, max];
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
"""Extract place=* nodes and railway stations from OSM PBF → data/places.parquet.
|
||||
"""Extract places, stations, and universities → data/places.parquet.
|
||||
|
||||
Extracts named place nodes and railway stations (tube, national rail, DLR,
|
||||
etc.) for typeahead search.
|
||||
etc.) for typeahead search. Official English university providers from the
|
||||
Office for Students register can also be added as travel-time destinations.
|
||||
Reuses the same england-latest.osm.pbf as pois.py.
|
||||
"""
|
||||
|
||||
|
|
@ -53,6 +54,19 @@ _STATION_STRIP = (
|
|||
)
|
||||
|
||||
_DLR_CODE_RE = re.compile(r"ZZDL([A-Z0-9]{3})")
|
||||
_POSTCODE_RE = re.compile(r"\b([A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2})\b", re.I)
|
||||
|
||||
_NOISY_PROVIDER_SUFFIXES = (
|
||||
" higher education corporation",
|
||||
" limited",
|
||||
" ltd",
|
||||
)
|
||||
|
||||
_LEGAL_NAME_FALLBACK_MARKERS = (
|
||||
"the chancellor",
|
||||
"chancellor, masters",
|
||||
"chancellor masters",
|
||||
)
|
||||
|
||||
|
||||
def _is_dlr_station(tags: dict[str, str]) -> bool:
|
||||
|
|
@ -124,6 +138,170 @@ def _station_name_score(name: str) -> tuple[int, int]:
|
|||
return (suffix_penalty, len(name))
|
||||
|
||||
|
||||
def _cell_text(value: object) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return str(value).strip()
|
||||
|
||||
|
||||
def _header_key(value: object) -> str:
|
||||
return re.sub(r"[^a-z0-9]+", " ", _cell_text(value).lower()).strip()
|
||||
|
||||
|
||||
def _find_header_row(rows: list[tuple]) -> int:
|
||||
for idx, row in enumerate(rows):
|
||||
keys = [_header_key(value) for value in row]
|
||||
has_legal_name = any(
|
||||
all(token in key for token in ("provider", "legal", "name"))
|
||||
for key in keys
|
||||
)
|
||||
has_university_title = any(
|
||||
all(token in key for token in ("right", "use", "university"))
|
||||
for key in keys
|
||||
)
|
||||
if has_legal_name and has_university_title:
|
||||
return idx
|
||||
raise ValueError("Could not find the OfS register header row")
|
||||
|
||||
|
||||
def _find_column(headers: list[object], *tokens: str) -> int:
|
||||
for idx, header in enumerate(headers):
|
||||
key = _header_key(header)
|
||||
if all(token in key for token in tokens):
|
||||
return idx
|
||||
raise ValueError(f"Could not find OfS register column containing {tokens}")
|
||||
|
||||
|
||||
def _normalize_postcode(postcode: str) -> str:
|
||||
return re.sub(r"[^A-Z0-9]", "", postcode.upper())
|
||||
|
||||
|
||||
def _extract_postcode(address: str) -> str | None:
|
||||
match = _POSTCODE_RE.search(address)
|
||||
if match is None:
|
||||
return None
|
||||
return _normalize_postcode(match.group(1))
|
||||
|
||||
|
||||
def _clean_provider_name(name: str) -> str:
|
||||
name = re.sub(r"\s+", " ", name).strip(" ,")
|
||||
if name.lower().endswith(", the"):
|
||||
name = f"The {name[:-5].strip(' ,')}"
|
||||
for suffix in _NOISY_PROVIDER_SUFFIXES:
|
||||
if name.lower().endswith(suffix):
|
||||
name = name[: -len(suffix)].strip(" ,")
|
||||
break
|
||||
if name.startswith("The ") and name != "The Open University":
|
||||
name = name[4:].strip()
|
||||
return name
|
||||
|
||||
|
||||
def _split_trading_names(trading_names: str) -> list[str]:
|
||||
if not trading_names or trading_names.casefold() == "not applicable":
|
||||
return []
|
||||
return [
|
||||
_clean_provider_name(name)
|
||||
for name in trading_names.splitlines()
|
||||
if _clean_provider_name(name)
|
||||
]
|
||||
|
||||
|
||||
def _needs_trading_name(legal_name: str) -> bool:
|
||||
lower = legal_name.lower()
|
||||
return any(marker in lower for marker in _LEGAL_NAME_FALLBACK_MARKERS) or any(
|
||||
lower.endswith(suffix) for suffix in _NOISY_PROVIDER_SUFFIXES
|
||||
)
|
||||
|
||||
|
||||
def _select_university_name(legal_name: str, trading_names: str) -> str:
|
||||
legal = _clean_provider_name(legal_name)
|
||||
trading = _split_trading_names(trading_names)
|
||||
if _needs_trading_name(legal_name):
|
||||
for name in trading:
|
||||
if "university" in name.lower() or "imperial college" in name.lower():
|
||||
return name
|
||||
if trading:
|
||||
return trading[0]
|
||||
return legal
|
||||
|
||||
|
||||
def _slugify_name(name: str) -> str:
|
||||
slug = name.lower()
|
||||
slug = re.sub(r"[^a-z0-9 -]", "", slug)
|
||||
return re.sub(r"\s+", "-", slug).strip("-")
|
||||
|
||||
|
||||
def _postcode_lookup(postcodes_path: Path) -> dict[str, tuple[float, float]]:
|
||||
df = pl.read_parquet(
|
||||
postcodes_path,
|
||||
columns=["pcds", "lat", "long", "ctry25cd", "doterm"],
|
||||
).filter((pl.col("ctry25cd") == "E92000001") & pl.col("doterm").is_null())
|
||||
return {
|
||||
_normalize_postcode(postcode): (float(lat), float(lon))
|
||||
for postcode, lat, lon in df.select(["pcds", "lat", "long"]).iter_rows()
|
||||
}
|
||||
|
||||
|
||||
def _ofs_universities(
|
||||
raw: pl.DataFrame, postcode_coords: dict[str, tuple[float, float]]
|
||||
) -> tuple[list[dict], int]:
|
||||
rows = raw.rows()
|
||||
header_idx = _find_header_row(rows)
|
||||
headers = list(rows[header_idx])
|
||||
legal_idx = _find_column(headers, "provider", "legal", "name")
|
||||
trading_idx = _find_column(headers, "trading", "name")
|
||||
address_idx = _find_column(headers, "contact", "address")
|
||||
university_title_idx = _find_column(headers, "right", "use", "university")
|
||||
|
||||
universities: list[dict] = []
|
||||
skipped = 0
|
||||
for row in rows[header_idx + 1 :]:
|
||||
if _cell_text(row[university_title_idx]).casefold() != "yes":
|
||||
continue
|
||||
|
||||
name = _select_university_name(
|
||||
_cell_text(row[legal_idx]), _cell_text(row[trading_idx])
|
||||
)
|
||||
postcode = _extract_postcode(_cell_text(row[address_idx]))
|
||||
coords = postcode_coords.get(postcode or "")
|
||||
if not name or coords is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
lat, lon = coords
|
||||
universities.append(
|
||||
{
|
||||
"name": name,
|
||||
"place_type": "university",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
}
|
||||
)
|
||||
|
||||
return universities, skipped
|
||||
|
||||
|
||||
def _append_ofs_universities(
|
||||
places: list[dict], register_path: Path, postcodes_path: Path
|
||||
) -> tuple[int, int]:
|
||||
postcode_coords = _postcode_lookup(postcodes_path)
|
||||
raw = pl.read_excel(register_path, has_header=False)
|
||||
universities, skipped = _ofs_universities(raw, postcode_coords)
|
||||
|
||||
existing_slugs = {_slugify_name(str(place["name"])) for place in places}
|
||||
added = 0
|
||||
for university in universities:
|
||||
slug = _slugify_name(university["name"])
|
||||
if slug in existing_slugs:
|
||||
continue
|
||||
places.append(university)
|
||||
existing_slugs.add(slug)
|
||||
added += 1
|
||||
return added, skipped
|
||||
|
||||
|
||||
def _naptan_dlr_stations(naptan_path: Path) -> list[dict]:
|
||||
"""Extract station-level DLR destinations from NaPTAN access nodes."""
|
||||
df = pl.read_parquet(naptan_path)
|
||||
|
|
@ -293,6 +471,16 @@ def main() -> None:
|
|||
type=Path,
|
||||
help="Optional NaPTAN parquet file used to add DLR station destinations",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--university-register",
|
||||
type=Path,
|
||||
help="Optional OfS register spreadsheet used to add university destinations",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--postcodes",
|
||||
type=Path,
|
||||
help="Postcode parquet used to geocode OfS university contact postcodes",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
pbf_file = args.pbf
|
||||
|
|
@ -313,6 +501,17 @@ def main() -> None:
|
|||
if args.naptan:
|
||||
added = _append_naptan_dlr_stations(handler.places, args.naptan)
|
||||
print(f"Added {added:,} DLR station destinations from NaPTAN")
|
||||
if args.university_register:
|
||||
if not args.postcodes:
|
||||
raise ValueError("--postcodes is required with --university-register")
|
||||
added, skipped = _append_ofs_universities(
|
||||
handler.places, args.university_register, args.postcodes
|
||||
)
|
||||
print(
|
||||
f"Added {added:,} university travel destinations from the OfS register"
|
||||
)
|
||||
if skipped:
|
||||
print(f"Skipped {skipped:,} OfS university rows without usable coordinates")
|
||||
|
||||
if handler.places:
|
||||
df = pl.DataFrame(handler.places)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ from pipeline.download.places import (
|
|||
_is_dlr_station,
|
||||
_is_tram_station,
|
||||
_naptan_dlr_stations,
|
||||
_ofs_universities,
|
||||
_select_university_name,
|
||||
_station_display_name,
|
||||
)
|
||||
|
||||
|
|
@ -79,3 +81,68 @@ def test_naptan_dlr_stations_are_deduplicated_by_atco_code(tmp_path):
|
|||
assert shadwell["lat"] == (51.51156 + 51.511693) / 2
|
||||
assert shadwell["place_type"] == "station"
|
||||
assert shadwell["travel_destination"] is True
|
||||
|
||||
|
||||
def test_select_university_name_prefers_public_trading_name_for_noisy_legal_name():
|
||||
assert (
|
||||
_select_university_name(
|
||||
"The Chancellor, Masters and Scholars of the University of Oxford",
|
||||
"Oxford University\nThe University of Oxford",
|
||||
)
|
||||
== "Oxford University"
|
||||
)
|
||||
assert (
|
||||
_select_university_name(
|
||||
"Bournemouth University Higher Education Corporation",
|
||||
"Bournemouth University",
|
||||
)
|
||||
== "Bournemouth University"
|
||||
)
|
||||
assert (
|
||||
_select_university_name("The University of Surrey", "Not applicable")
|
||||
== "University of Surrey"
|
||||
)
|
||||
|
||||
|
||||
def test_ofs_universities_extracts_university_title_rows_with_postcode_coords():
|
||||
raw_register = pl.DataFrame(
|
||||
[
|
||||
["OfS Register", None, None, None],
|
||||
["Note row", None, None, None],
|
||||
[
|
||||
"Provider's legal name",
|
||||
"Provider's trading name(s)",
|
||||
"Provider's contact address",
|
||||
"Does the provider have the right to use university in its title?",
|
||||
],
|
||||
[
|
||||
"The Chancellor, Masters and Scholars of the University of Oxford",
|
||||
"Oxford University\nThe University of Oxford",
|
||||
"University Offices\nWellington Square\nOxford\nOX1 2JD\nUnited Kingdom",
|
||||
"Yes",
|
||||
],
|
||||
[
|
||||
"Example College",
|
||||
"Not applicable",
|
||||
"Example Street\nLondon\nSW1A 1AA\nUnited Kingdom",
|
||||
"No",
|
||||
],
|
||||
],
|
||||
orient="row",
|
||||
)
|
||||
|
||||
universities, skipped = _ofs_universities(
|
||||
raw_register, {"OX12JD": (51.7585, -1.2643)}
|
||||
)
|
||||
|
||||
assert skipped == 0
|
||||
assert universities == [
|
||||
{
|
||||
"name": "Oxford University",
|
||||
"place_type": "university",
|
||||
"lat": 51.7585,
|
||||
"lon": -1.2643,
|
||||
"population": 0,
|
||||
"travel_destination": True,
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
import polars as pl
|
||||
|
||||
from pipeline.transform.merge import (
|
||||
_AREA_COLUMNS,
|
||||
_STATIC_POI_DISTANCE_RENAMES,
|
||||
_is_dynamic_poi_metric_column,
|
||||
_less_deprived_percentile_expr,
|
||||
)
|
||||
|
|
@ -11,9 +9,11 @@ from pipeline.transform.merge import (
|
|||
def test_less_deprived_percentile_expr_preserves_direction_and_nulls() -> None:
|
||||
df = pl.DataFrame({"Income Score (rate)": [1.0, 2.0, 3.0, None]})
|
||||
|
||||
result = df.lazy().with_columns(
|
||||
_less_deprived_percentile_expr("Income Score (rate)")
|
||||
).collect()
|
||||
result = (
|
||||
df.lazy()
|
||||
.with_columns(_less_deprived_percentile_expr("Income Score (rate)"))
|
||||
.collect()
|
||||
)
|
||||
|
||||
assert result["Income Score (rate)"].to_list() == [100.0, 50.0, 0.0, None]
|
||||
|
||||
|
|
@ -21,28 +21,18 @@ def test_less_deprived_percentile_expr_preserves_direction_and_nulls() -> None:
|
|||
def test_less_deprived_percentile_expr_uses_exact_scale_endpoints() -> None:
|
||||
df = pl.DataFrame({"Income Score (rate)": [1.0, 1.0, 2.0, 3.0, 3.0]})
|
||||
|
||||
result = df.lazy().with_columns(
|
||||
_less_deprived_percentile_expr("Income Score (rate)")
|
||||
).collect()
|
||||
result = (
|
||||
df.lazy()
|
||||
.with_columns(_less_deprived_percentile_expr("Income Score (rate)"))
|
||||
.collect()
|
||||
)
|
||||
|
||||
assert result["Income Score (rate)"].to_list() == [100.0, 100.0, 50.0, 0.0, 0.0]
|
||||
|
||||
|
||||
def test_dynamic_poi_metric_columns_are_area_level() -> None:
|
||||
assert _is_dynamic_poi_metric_column("Distance to nearest amenity (Cafe) (km)")
|
||||
assert _is_dynamic_poi_metric_column("Distance to nearest amenity (Park) (km)")
|
||||
assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 2km")
|
||||
assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 5km")
|
||||
assert not _is_dynamic_poi_metric_column("Number of restaurants within 2km")
|
||||
|
||||
|
||||
def test_static_poi_distance_columns_are_renamed_to_configured_area_features() -> None:
|
||||
expected = {
|
||||
"parks_nearest_km": "Distance to nearest park (km)",
|
||||
"grocery_store_nearest_km": "Distance to nearest grocery store (km)",
|
||||
"cafe_nearest_km": "Distance to nearest cafe (km)",
|
||||
"pub_nearest_km": "Distance to nearest pub (km)",
|
||||
"restaurant_nearest_km": "Distance to nearest restaurant (km)",
|
||||
}
|
||||
|
||||
assert _STATIC_POI_DISTANCE_RENAMES == expected
|
||||
assert set(expected.values()).issubset(_AREA_COLUMNS)
|
||||
|
|
|
|||
|
|
@ -2,45 +2,72 @@ import polars as pl
|
|||
|
||||
from pipeline.utils import fuzzy_join_on_postcode
|
||||
|
||||
POSTCODE = "E14 2DG"
|
||||
|
||||
# Price paid: unique addresses for this postcode
|
||||
pp = (
|
||||
pl.scan_parquet("data/price-paid-complete.parquet")
|
||||
.filter(pl.col("postcode") == POSTCODE)
|
||||
.select("paon", "saon", "street", "postcode")
|
||||
.unique()
|
||||
.sort("saon")
|
||||
.with_columns(
|
||||
pl.concat_str(
|
||||
[pl.col("saon"), pl.col("paon"), pl.col("street")],
|
||||
separator=" ",
|
||||
ignore_nulls=True,
|
||||
).alias("pp_address"),
|
||||
def test_fuzzy_join_on_postcode_matches_addresses_within_postcode():
|
||||
left = pl.LazyFrame(
|
||||
{
|
||||
"left_id": ["flat", "house", "unmatched"],
|
||||
"left_address": [
|
||||
"Flat 2, 10 High Street",
|
||||
"12 High Street",
|
||||
"99 Other Road",
|
||||
],
|
||||
"left_postcode": ["AB1 2CD", "AB1 2CD", "AB1 2CD"],
|
||||
}
|
||||
)
|
||||
right = pl.LazyFrame(
|
||||
{
|
||||
"right_id": ["flat_epc", "house_epc", "other_postcode"],
|
||||
"right_address": [
|
||||
"10 HIGH STREET FLAT 2",
|
||||
"12 High-Street",
|
||||
"99 Other Road",
|
||||
],
|
||||
"right_postcode": [" AB1 2CD ", "AB1 2CD", "ZZ9 9ZZ"],
|
||||
}
|
||||
)
|
||||
|
||||
# EPC: latest inspection per address for this postcode
|
||||
epc = (
|
||||
pl.scan_csv("data/epc/certificates.csv")
|
||||
.select("ADDRESS", "POSTCODE", "INSPECTION_DATE")
|
||||
.filter(pl.col("POSTCODE").str.strip_chars() == POSTCODE)
|
||||
.sort("INSPECTION_DATE", descending=True)
|
||||
.unique("ADDRESS")
|
||||
.sort("ADDRESS")
|
||||
result = (
|
||||
fuzzy_join_on_postcode(
|
||||
left=left,
|
||||
right=right,
|
||||
left_address_col="left_address",
|
||||
right_address_col="right_address",
|
||||
left_postcode_col="left_postcode",
|
||||
right_postcode_col="right_postcode",
|
||||
)
|
||||
.sort("left_id")
|
||||
.collect()
|
||||
)
|
||||
|
||||
assert result.select("left_id", "right_id").to_dicts() == [
|
||||
{"left_id": "flat", "right_id": "flat_epc"},
|
||||
{"left_id": "house", "right_id": "house_epc"},
|
||||
{"left_id": "unmatched", "right_id": None},
|
||||
]
|
||||
|
||||
|
||||
def test_fuzzy_join_on_postcode_requires_matching_numbers():
|
||||
left = pl.LazyFrame(
|
||||
{
|
||||
"left_address": ["10 High Street"],
|
||||
"left_postcode": ["AB1 2CD"],
|
||||
}
|
||||
)
|
||||
right = pl.LazyFrame(
|
||||
{
|
||||
"right_address": ["11 High Street"],
|
||||
"right_postcode": ["AB1 2CD"],
|
||||
}
|
||||
)
|
||||
|
||||
result = fuzzy_join_on_postcode(
|
||||
left=pp,
|
||||
right=epc,
|
||||
left_address_col="pp_address",
|
||||
right_address_col="ADDRESS",
|
||||
left_postcode_col="postcode",
|
||||
right_postcode_col="POSTCODE",
|
||||
left=left,
|
||||
right=right,
|
||||
left_address_col="left_address",
|
||||
right_address_col="right_address",
|
||||
left_postcode_col="left_postcode",
|
||||
right_postcode_col="right_postcode",
|
||||
).collect()
|
||||
|
||||
snapshot = result.select("pp_address", "ADDRESS").sort("pp_address")
|
||||
|
||||
print("Testing the matching between EPC and PP addresses")
|
||||
with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=80):
|
||||
print(snapshot)
|
||||
assert result["right_address"].to_list() == [None]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue