This commit is contained in:
Andras Schmelczer 2026-03-12 22:11:00 +00:00
parent 14a3555cf1
commit 7e92bf112e
34 changed files with 1214437 additions and 224 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,10 +1,14 @@
import json
import logging
import os
import random
import re
import time
from urllib.parse import unquote
import httpx
from curl_cffi.requests import Session
from curl_cffi.requests.errors import RequestsError
from constants import (
DELAY_BETWEEN_PAGES,
@ -66,19 +70,18 @@ def solve_cloudflare() -> tuple[dict[str, str], str] | None:
raw_cookies = solution.get("cookies", [])
user_agent = solution.get("userAgent", "")
# Pass through ALL cookies from FlareSolverr — different Cloudflare
# configurations set different cookies (cf_clearance only appears when
# a challenge is triggered; it's not needed if no challenge was detected)
cookies = {}
for c in raw_cookies:
name = c.get("name", "")
if name in ("cf_clearance", "homecouk_session", "XSRF-TOKEN"):
if name:
cookies[name] = c["value"]
if "cf_clearance" not in cookies:
log.error("FlareSolverr solved but no cf_clearance cookie returned")
flaresolverr_attempts_total.labels(result="no_cf_clearance").inc()
return None
if "homecouk_session" not in cookies:
log.error("FlareSolverr solved but no homecouk_session cookie returned")
flaresolverr_attempts_total.labels(result="no_session").inc()
if not cookies:
log.error("FlareSolverr solved but returned no cookies at all")
flaresolverr_attempts_total.labels(result="no_cookies").inc()
return None
log.info(
@ -121,19 +124,25 @@ def load_cookies() -> tuple[dict[str, str], str] | None:
return {"cf_clearance": cf_clearance, "homecouk_session": session}, user_agent
def make_client(cookies: dict[str, str], user_agent: str) -> httpx.Client:
"""Create an httpx Client configured for home.co.uk API calls.
user_agent must match the one used when obtaining cf_clearance."""
return httpx.Client(
timeout=30,
cookies=cookies,
headers={
def make_client(cookies: dict[str, str], user_agent: str) -> Session:
"""Create a curl_cffi Session configured for home.co.uk API calls.
Uses Chrome TLS impersonation so cf_clearance cookies (which are bound
to Chrome's JA3 fingerprint from FlareSolverr) remain valid."""
session = Session(impersonate="chrome")
session.headers.update({
"User-Agent": user_agent,
"Accept": "application/json, text/plain, */*",
"x-requested-with": "XMLHttpRequest",
},
follow_redirects=True,
)
})
# Laravel CSRF: the XSRF-TOKEN cookie value must also be sent as the
# X-XSRF-TOKEN request header (URL-decoded). Without this header, the
# server rejects every request with 419/403.
xsrf = cookies.get("XSRF-TOKEN")
if xsrf:
session.headers["X-XSRF-TOKEN"] = unquote(xsrf)
for name, value in cookies.items():
session.cookies.set(name, value, domain="home.co.uk")
return session
def _status_label(code: int) -> str:
@ -143,16 +152,21 @@ def _status_label(code: int) -> str:
def fetch_page(
client: httpx.Client, url: str, params: dict, max_retries: int = 3
client: Session, url: str, params: dict, max_retries: int = 3
) -> dict | None:
"""GET JSON with retries on 429/5xx. Returns None on permanent failure.
403 means cookies expired raises CookiesExpiredError immediately."""
for attempt in range(max_retries):
try:
resp = client.get(url, params=params)
resp = client.get(url, params=params, timeout=30)
homecouk_requests_total.labels(status=_status_label(resp.status_code)).inc()
if resp.status_code == 200:
try:
return resp.json()
except json.JSONDecodeError:
homecouk_errors_total.labels(type="json_decode").inc()
log.error("Non-JSON response from %s (got %s)", url, resp.headers.get("content-type", "?"))
return None
if resp.status_code == 403:
raise CookiesExpiredError("HTTP 403 — cookies likely expired")
if resp.status_code in (429, 500, 502, 503, 504):
@ -167,10 +181,7 @@ def fetch_page(
return None
except CookiesExpiredError:
raise
except (
httpx.ConnectError, httpx.ReadTimeout,
httpx.WriteTimeout, httpx.PoolTimeout,
) as e:
except RequestsError as e:
homecouk_errors_total.labels(type=type(e).__name__).inc()
delay = RETRY_BASE_DELAY * (2**attempt) + random.uniform(0, 1)
log.warning(
@ -285,7 +296,7 @@ def transform_property(
def search_outcode(
client: httpx.Client,
client: Session,
outcode: str,
channel: str,
pc_index: PostcodeSpatialIndex,

View file

@ -7,7 +7,7 @@ from pathlib import Path
from flask import Flask, Response, jsonify, send_from_directory
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
from constants import DATA_DIR, RUN_ON_STARTUP, SCHEDULE_HOUR
from constants import DATA_DIR, RUN_ON_STARTUP, SCHEDULE_HOUR, SCRAPE_HOMECOUK, SCRAPE_RIGHTMOVE
from homecouk import load_cookies as load_homecouk_cookies
from rightmove import outcode_cache
from scraper import (
@ -46,7 +46,8 @@ logging.getLogger("httpcore").setLevel(logging.WARNING)
log.info("Loading arcgis data...")
OUTCODES = load_outcodes()
PC_INDEX = build_postcode_index()
log.info("Ready — %d outcodes, postcode index built", len(OUTCODES))
log.info("Ready — %d outcodes, postcode index built (rightmove=%s, homecouk=%s)",
len(OUTCODES), SCRAPE_RIGHTMOVE, SCRAPE_HOMECOUK)
# ---------------------------------------------------------------------------
# Scheduler
@ -137,11 +138,13 @@ def get_status():
@app.route("/debug")
def get_debug():
hk_cookies = load_homecouk_cookies()
hk_cookies = load_homecouk_cookies() if SCRAPE_HOMECOUK else None
return jsonify({
"outcode_cache_size": len(outcode_cache),
"outcode_cache_sample": dict(list(outcode_cache.items())[:20]),
"homecouk_enabled": hk_cookies is not None,
"scrape_rightmove": SCRAPE_RIGHTMOVE,
"scrape_homecouk": SCRAPE_HOMECOUK,
"homecouk_cookies_available": hk_cookies is not None,
})

View file

@ -5,6 +5,7 @@ requires-python = ">=3.12"
dependencies = [
"flask",
"httpx",
"curl_cffi",
"polars",
"fake-useragent>=2.2.0",
"prometheus-client",

View file

@ -6,7 +6,7 @@ from dataclasses import dataclass, field
import polars as pl
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SEED
from constants import ARCGIS_PATH, CHANNELS, DATA_DIR, DELAY_BETWEEN_OUTCODES, SCRAPE_HOMECOUK, SCRAPE_RIGHTMOVE, SEED
from homecouk import CookiesExpiredError
from homecouk import load_cookies as load_homecouk_cookies
from homecouk import make_client as make_homecouk_client
@ -126,9 +126,25 @@ def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
random.seed(SEED)
random.shuffle(shuffled)
client = make_client()
if not SCRAPE_RIGHTMOVE and not SCRAPE_HOMECOUK:
log.warning("Both SCRAPE_RIGHTMOVE and SCRAPE_HOMECOUK are disabled — nothing to do")
with status_lock:
status.state = "done"
status.finished_at = time.time()
_sync_gauges()
return
# home.co.uk: optional, enabled when cookies are available (via FlareSolverr or env vars)
client = make_client() if SCRAPE_RIGHTMOVE else None
if not SCRAPE_RIGHTMOVE:
log.info("Rightmove scraping DISABLED (SCRAPE_RIGHTMOVE=false)")
# home.co.uk: must be enabled via SCRAPE_HOMECOUK + cookies available
hk_client = None
hk_failed = False
if not SCRAPE_HOMECOUK:
log.info("home.co.uk scraping DISABLED (SCRAPE_HOMECOUK=false)")
homecouk_enabled.set(0)
else:
hk_result = load_homecouk_cookies()
hk_client = make_homecouk_client(*hk_result) if hk_result else None
if hk_client:
@ -137,7 +153,6 @@ def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
else:
log.info("home.co.uk scraping DISABLED (need FlareSolverr or HOMECOUK_CF_CLEARANCE + HOMECOUK_SESSION)")
homecouk_enabled.set(0)
hk_failed = False # set to True on 403 to skip remaining outcodes
try:
for channel_cfg in CHANNELS:
@ -167,6 +182,7 @@ def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
outcode, i + 1, len(shuffled), len(all_properties))
# --- Rightmove ---
if SCRAPE_RIGHTMOVE:
try:
outcode_id = resolve_outcode_id(client, outcode)
if not outcode_id:
@ -276,6 +292,7 @@ def run_scrape(outcodes: list[str], pc_index: PostcodeSpatialIndex) -> None:
status.finished_at = time.time()
_sync_gauges()
finally:
if client:
client.close()
if hk_client:
hk_client.close()

View file

@ -58,7 +58,7 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
def normalize_price(amount: int, frequency: str) -> int:
"""Normalize price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
"""Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
if frequency == "weekly":
return round(amount * 52 / 12)
if frequency == "yearly":
@ -111,7 +111,7 @@ def transform_property(prop: dict, outcode: str, pc_index: PostcodeSpatialIndex)
"lat": lat,
"Postcode": postcode,
"Address per Property Register": prop.get("displayAddress", ""),
"Leashold/Freehold": extract_tenure(prop.get("tenure")),
"Leasehold/Freehold": extract_tenure(prop.get("tenure")),
"Property type": map_property_type(sub_type),
"Property sub-type": sub_type or "Unknown",
"price": price,

View file

@ -17,12 +17,14 @@
"@protomaps/basemaps": "^5.7.0",
"@radix-ui/react-select": "^2.0.0",
"@radix-ui/react-slider": "^1.1.0",
"@types/supercluster": "^7.1.3",
"maplibre-gl": "^4.0.0",
"pocketbase": "^0.26.8",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-joyride": "^2.9.3",
"react-map-gl": "^7.1.0"
"react-map-gl": "^7.1.0",
"supercluster": "^8.0.1"
},
"devDependencies": {
"@babel/core": "^7.29.0",
@ -4729,7 +4731,6 @@
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
"integrity": "sha512-Z0pOY34GDFl3Q6hUFYf3HkTwKEE02e7QgtJppBt+beEAxnyOpJua+voGFvxINBHa06GwLFFym7gRPY2SiKIfIA==",
"license": "MIT",
"dependencies": {
"@types/geojson": "*"
}
@ -13484,7 +13485,6 @@
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/supercluster/-/supercluster-8.0.1.tgz",
"integrity": "sha512-IiOea5kJ9iqzD2t7QJq/cREyLHTtSmUT6gQsweojg9WH2sYJqZK9SswTu6jrscO6D1G5v5vYZ9ru/eq85lXeZQ==",
"license": "ISC",
"dependencies": {
"kdbush": "^4.0.2"
}

View file

@ -22,12 +22,14 @@
"@protomaps/basemaps": "^5.7.0",
"@radix-ui/react-select": "^2.0.0",
"@radix-ui/react-slider": "^1.1.0",
"@types/supercluster": "^7.1.3",
"maplibre-gl": "^4.0.0",
"pocketbase": "^0.26.8",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-joyride": "^2.9.3",
"react-map-gl": "^7.1.0"
"react-map-gl": "^7.1.0",
"supercluster": "^8.0.1"
},
"devDependencies": {
"@babel/core": "^7.29.0",

View file

@ -31,8 +31,8 @@ export default memo(function HoverCard({ x, y, id, isPostcode, data, filters, fe
const results: { name: string; value: string }[] = [];
// Show stats for active filters (up to 4)
for (const name of activeFilterNames.slice(0, 4)) {
// Show stats for active filters (up to 4), excluding Listing status
for (const name of activeFilterNames.filter((n) => n !== 'Listing status').slice(0, 4)) {
const val = data[`avg_${name}`] ?? data[`min_${name}`];
if (val == null || typeof val !== 'number') continue;
const meta = featureMap.get(name);
@ -50,14 +50,31 @@ export default memo(function HoverCard({ x, y, id, isPostcode, data, filters, fe
const displayStats = getDisplayStats();
const count = data?.count;
return (
<div
className="absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg p-3 text-sm dark:text-white pointer-events-none z-50 min-w-[180px] max-w-[260px]"
style={{
const cardStyle = {
left: x,
top: y - 12,
transform: 'translate(-50%, -100%)',
}}
};
// Loading state: show skeleton when data hasn't arrived yet
if (!data) {
return (
<div
className="absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg p-3 text-sm pointer-events-none z-50 min-w-[140px]"
style={cardStyle}
>
<div className="animate-pulse space-y-2">
<div className="h-3.5 w-20 bg-warm-200 dark:bg-warm-600 rounded" />
<div className="h-2.5 w-14 bg-warm-100 dark:bg-warm-700 rounded" />
</div>
</div>
);
}
return (
<div
className="absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg p-3 text-sm dark:text-white pointer-events-none z-50 min-w-[180px] max-w-[260px]"
style={cardStyle}
>
<div className="relative">
{/* Header */}
@ -89,11 +106,9 @@ export default memo(function HoverCard({ x, y, id, isPostcode, data, filters, fe
)}
{/* Hint */}
{data && (
<div className="text-[10px] text-warm-400 dark:text-warm-400 mt-2 text-center">
Click for details
</div>
)}
</div>
</div>
);

View file

@ -1,9 +1,47 @@
import { formatValue } from '../../lib/format';
import { FEATURE_GRADIENT, DENSITY_GRADIENT, DENSITY_GRADIENT_DARK } from '../../lib/consts';
import { FEATURE_GRADIENT, DENSITY_GRADIENT, DENSITY_GRADIENT_DARK, ENUM_PALETTE } from '../../lib/consts';
import { gradientToCss } from '../../lib/utils';
import { CloseIcon } from '../ui/icons/CloseIcon';
import { TickerValue } from '../ui/TickerValue';
function EnumSwatches({ values }: { values: string[] }) {
return (
<div className="flex flex-col gap-1">
{values.map((label, i) => {
const color = ENUM_PALETTE[i % ENUM_PALETTE.length];
return (
<div key={label} className="flex items-center gap-1.5">
<div
className="w-3 h-3 rounded-sm shrink-0"
style={{ backgroundColor: `rgb(${color[0]},${color[1]},${color[2]})` }}
/>
<span className="text-warm-600 dark:text-warm-300 truncate">{label}</span>
</div>
);
})}
</div>
);
}
function InlineEnumSwatches({ values }: { values: string[] }) {
return (
<div className="flex items-center gap-2 flex-1 min-w-[40%] flex-wrap">
{values.map((label, i) => {
const color = ENUM_PALETTE[i % ENUM_PALETTE.length];
return (
<div key={label} className="flex items-center gap-1">
<div
className="w-2.5 h-2.5 rounded-sm shrink-0"
style={{ backgroundColor: `rgb(${color[0]},${color[1]},${color[2]})` }}
/>
<span className="text-warm-500 dark:text-warm-400 whitespace-nowrap text-[11px]">{label}</span>
</div>
);
})}
</div>
);
}
export default function MapLegend({
featureLabel,
range,
@ -27,6 +65,7 @@ export default function MapLegend({
suffix?: string;
raw?: boolean;
}) {
const isEnum = enumValues && enumValues.length > 0;
const densityGradient = theme === 'dark' ? DENSITY_GRADIENT_DARK : DENSITY_GRADIENT;
const gradientStyle =
mode === 'density' ? gradientToCss(densityGradient) : gradientToCss(FEATURE_GRADIENT);
@ -36,18 +75,14 @@ export default function MapLegend({
const rangeMin =
mode === 'density' ? (
<TickerValue text={formatValue(range[0])} />
) : enumValues && enumValues.length > 0 ? (
<span>{enumValues[0]}</span>
) : (
) : isEnum ? null : (
<TickerValue text={formatValue(range[0], fmt) + (suffix || '')} />
);
const rangeMax =
mode === 'density' ? (
<TickerValue text={formatValue(range[1])} />
) : enumValues && enumValues.length > 0 ? (
<span>{enumValues[enumValues.length - 1]}</span>
) : (
) : isEnum ? null : (
<TickerValue text={formatValue(range[1], fmt) + (suffix || '')} />
);
@ -66,11 +101,15 @@ export default function MapLegend({
<CloseIcon className="w-3.5 h-3.5" />
</button>
)}
{isEnum ? (
<InlineEnumSwatches values={enumValues} />
) : (
<div className="flex items-center gap-1.5 flex-1 min-w-[40%] text-warm-500 dark:text-warm-400">
{rangeMin}
<div className="h-2.5 rounded flex-1 min-w-[40px]" style={{ background: gradientStyle }} />
{rangeMax}
</div>
)}
</div>
);
}
@ -89,11 +128,17 @@ export default function MapLegend({
</button>
)}
</div>
{isEnum ? (
<EnumSwatches values={enumValues} />
) : (
<>
<div className="h-3 rounded" style={{ background: gradientStyle }} />
<div className="flex justify-between mt-1 text-warm-600 dark:text-warm-200">
{rangeMin}
{rangeMax}
</div>
</>
)}
</div>
);
}

View file

@ -86,7 +86,7 @@ export default function MapPage({
useState<Set<string>>(initialPOICategories);
const [leftPaneWidth, leftPaneHandlers] = usePaneResize(384, 200, 600, 'left');
const [rightPaneWidth, rightPaneHandlers] = usePaneResize(288, 200, 500, 'right');
const [rightPaneWidth, rightPaneHandlers] = usePaneResize(384, 200, 500, 'right');
const [mobileDrawerOpen, setMobileDrawerOpen] = useState(false);
const [poiPaneOpen, setPoiPaneOpen] = useState(false);

View file

@ -1,5 +1,6 @@
import type { FeatureMeta } from '../../types';
import { InfoIcon } from './icons';
import { getFeatureIcon } from '../../lib/feature-icons';
import { getGroupIcon } from '../../lib/group-icons';
const MODE_LABELS: Record<string, string> = {
@ -22,7 +23,9 @@ export function FeatureLabel({
size = 'xs',
}: FeatureLabelProps) {
const textClass = size === 'sm' ? 'text-sm' : 'text-xs';
const GroupIcon = feature.group ? getGroupIcon(feature.group) : null;
const iconClass = 'w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0';
const featureIcon = getFeatureIcon(feature.name, iconClass);
const GroupIcon = !featureIcon && feature.group ? getGroupIcon(feature.group) : null;
const modeTag =
feature.modes && feature.modes.length > 0
? feature.modes.map((m) => MODE_LABELS[m] || m).join(' · ')
@ -32,9 +35,8 @@ export function FeatureLabel({
<div
className={`flex ${size === 'xs' ? 'items-center' : 'items-start'} gap-1 min-w-0 ${className}`}
>
{GroupIcon && (
<GroupIcon className="w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0" />
)}
{featureIcon}
{GroupIcon && <GroupIcon className={iconClass} />}
<span
className={`${textClass} text-warm-700 dark:text-warm-300 ${size === 'xs' ? 'truncate' : ''}`}
>

View file

@ -143,6 +143,13 @@ export function useDeckLayers({
const colorFeatureMetaRef = useRef(colorFeatureMeta);
colorFeatureMetaRef.current = colorFeatureMeta;
// Track enum value count for discrete coloring (0 = numeric/continuous)
const enumCountRef = useRef(0);
enumCountRef.current =
colorFeatureMeta?.type === 'enum' && colorFeatureMeta.values
? colorFeatureMeta.values.length
: 0;
// --- Count ranges ---
const countRange = useMemo(() => {
if (data.length === 0) return { min: 0, max: 1 };
@ -360,7 +367,8 @@ export function useDeckLayers({
0,
densityGradientRef.current,
dark,
255
255,
enumCountRef.current
);
}
}
@ -433,7 +441,8 @@ export function useDeckLayers({
0,
densityGradientRef.current,
dark,
180
180,
enumCountRef.current
);
}
const cr = postcodeCountRangeRef.current;

View file

@ -8,6 +8,15 @@ function looksLikePostcode(s: string) {
return POSTCODE_RE.test(s.trim());
}
/** Normalize a UK postcode: uppercase, strip spaces, insert canonical space before inward code. */
function normalizePostcode(s: string): string {
const stripped = s.replace(/\s+/g, '').toUpperCase();
if (stripped.length >= 5) {
return stripped.slice(0, -3) + ' ' + stripped.slice(-3);
}
return stripped;
}
export type SearchResult =
| { type: 'postcode'; label: string }
| { type: 'place'; name: string; slug: string; place_type: string; lat: number; lon: number; city?: string };
@ -35,7 +44,7 @@ export function useLocationSearch(mode?: string) {
}
if (!mode && looksLikePostcode(trimmed)) {
setResults([{ type: 'postcode', label: trimmed.toUpperCase() }]);
setResults([{ type: 'postcode', label: normalizePostcode(trimmed) }]);
setOpen(true);
return;
}
@ -97,7 +106,7 @@ export function useLocationSearch(mode?: string) {
if (activeIndex >= 0 && activeIndex < results.length) {
onSelect(results[activeIndex]);
} else if (looksLikePostcode(query)) {
onSelect({ type: 'postcode', label: query.trim().toUpperCase() });
onSelect({ type: 'postcode', label: normalizePostcode(query) });
}
} else if (e.key === 'Escape') {
setOpen(false);

View file

@ -14,7 +14,7 @@ export const MAP_MIN_ZOOM = 5.5;
export const BUFFER_MULTIPLIER = 1.5;
/** Inner London free zone bounds (south, west, north, east) — must match server FREE_ZONE_BOUNDS */
export const FREE_ZONE_BOUNDS = { south: 51.42, west: -0.34, north: 51.60, east: 0.14 };
export const FREE_ZONE_BOUNDS = { south: 51.44, west: -0.31, north: 51.59, east: 0.05 };
export const INITIAL_VIEW_STATE: ViewState = {
longitude: (FREE_ZONE_BOUNDS.west + FREE_ZONE_BOUNDS.east) / 2,
@ -183,8 +183,8 @@ export const STACKED_ENUM_GROUPS: Record<
label: 'Property type',
feature: 'Property type',
components: ['Property type'],
valueOrder: ['Detached', 'Semi-Detached', 'Terraced', 'Flats/Maisonettes'],
valueColors: ['#8b5cf6', '#3b82f6', '#14b8a6', '#f59e0b'],
valueOrder: ['Detached', 'Semi-Detached', 'Terraced', 'Flats/Maisonettes', 'Other'],
valueColors: ['#8b5cf6', '#3b82f6', '#14b8a6', '#f59e0b', '#6b7280'],
},
{
label: 'Leasehold/Freehold',
@ -212,6 +212,23 @@ export const STACKED_ENUM_GROUPS: Record<
],
};
/**
* Maximally-distinguishable palette for discrete enum features on the map.
* 10 colors chosen for perceptual distinctness in both light and dark modes.
*/
export const ENUM_PALETTE: [number, number, number][] = [
[59, 130, 246], // blue-500
[249, 115, 22], // orange-500
[139, 92, 246], // violet-500
[34, 197, 94], // green-500
[239, 68, 68], // red-500
[6, 182, 212], // cyan-500
[236, 72, 153], // pink-500
[245, 158, 11], // amber-500
[20, 184, 166], // teal-500
[107, 114, 128], // gray-500
];
/** Colors for stacked bar segments */
export const SEGMENT_COLORS = [
'#ef4444', // red-500

View file

@ -19,13 +19,9 @@ const PROPERTY_TYPE_MAP: Record<
onthemarket: 'semi-detached',
zoopla: 'semi_detached',
},
'Mid-Terrace': { rightmove: 'terraced', onthemarket: 'terraced', zoopla: 'terraced' },
'End-Terrace': { rightmove: 'terraced', onthemarket: 'terraced', zoopla: 'terraced' },
'Enclosed Mid-Terrace': { rightmove: 'terraced', onthemarket: 'terraced', zoopla: 'terraced' },
'Enclosed End-Terrace': { rightmove: 'terraced', onthemarket: 'terraced', zoopla: 'terraced' },
Terraced: { rightmove: 'terraced', onthemarket: 'terraced', zoopla: 'terraced' },
'Flats/Maisonettes': { rightmove: 'flat', onthemarket: 'flats', zoopla: 'flat' },
Bungalow: { rightmove: 'bungalow', onthemarket: 'bungalow', zoopla: 'bungalow' },
'Park home': { rightmove: 'park-home', onthemarket: 'property', zoopla: '' },
Other: { rightmove: '', onthemarket: 'property', zoopla: '' },
};
export const H3_RADIUS_MILES: Record<number, number> = {

View file

@ -0,0 +1,509 @@
import type { ReactNode, ReactElement } from 'react';
/**
* Per-feature SVG icon paths. Each feature gets a unique icon within its group.
* All paths render inside a shared 24x24 viewBox with stroke="currentColor".
*/
const FEATURE_ICON_PATHS: Record<string, ReactNode> = {
// ── Properties in the area ───────────────────
'Last known price': (
<>
<path d="M7 20h10" />
<path d="M10 20V9a4 4 0 018 0" />
<path d="M6 14h8" />
</>
),
'Estimated current price': (
<>
<polyline points="4 16 8 12 13 15 20 6" />
<polyline points="15 6 21 6 21 12" />
</>
),
'Price per sqm': (
<>
<rect x="3" y="3" width="7" height="7" />
<rect x="14" y="3" width="7" height="7" />
<rect x="14" y="14" width="7" height="7" />
<rect x="3" y="14" width="7" height="7" />
</>
),
'Est. price per sqm': (
<>
<rect x="3" y="7" width="18" height="10" rx="1" />
<path d="M7 7v4m4-4v6m4-6v4m4-4v6" />
</>
),
'Total floor area (sqm)': (
<>
<polyline points="15 3 21 3 21 9" />
<polyline points="9 21 3 21 3 15" />
<line x1="21" y1="3" x2="14" y2="10" />
<line x1="3" y1="21" x2="10" y2="14" />
</>
),
'Interior height (m)': (
<>
<line x1="12" y1="2" x2="12" y2="22" />
<polyline points="8 6 12 2 16 6" />
<polyline points="8 18 12 22 16 18" />
</>
),
'Number of bedrooms & living rooms': (
<>
<rect x="5" y="2" width="14" height="20" rx="1" />
<circle cx="15" cy="12" r="1" fill="currentColor" />
</>
),
'Estimated monthly rent': (
<>
<circle cx="8" cy="15" r="5" />
<path d="M12 11l9-9" />
<path d="M17 2h4v4" />
</>
),
'Date of last transaction': (
<>
<rect x="3" y="4" width="18" height="18" rx="2" />
<line x1="16" y1="2" x2="16" y2="6" />
<line x1="8" y1="2" x2="8" y2="6" />
<line x1="3" y1="10" x2="21" y2="10" />
</>
),
'Construction age': (
<>
<path d="M14.7 6.3a1 1 0 000 1.4l1.6 1.6a1 1 0 001.4 0l3.77-3.77a6 6 0 01-7.94 7.94l-6.91 6.91a2.12 2.12 0 01-3-3l6.91-6.91a6 6 0 017.94-7.94l-3.76 3.76z" />
</>
),
'Asking price': (
<>
<path d="M20.59 13.41l-7.17 7.17a2 2 0 01-2.83 0L2 12V2h10l8.59 8.59a2 2 0 010 2.82z" />
<line x1="7" y1="7" x2="7.01" y2="7" />
</>
),
'Asking rent (monthly)': (
<>
<circle cx="9" cy="9" r="7" />
<path d="M15.58 8.42A7 7 0 0122 15a7 7 0 01-7 7 7 7 0 01-6.58-4.58" />
</>
),
Bedrooms: (
<>
<path d="M2 4v16" />
<path d="M2 8h18a2 2 0 012 2v10" />
<path d="M2 17h20" />
<path d="M6 4v4" />
</>
),
Bathrooms: (
<>
<path d="M4 12h16a1 1 0 011 1v3a4 4 0 01-4 4H7a4 4 0 01-4-4v-3a1 1 0 011-1z" />
<path d="M6 12V5a2 2 0 012-2h3" />
<line x1="14" y1="4" x2="17" y2="4" />
</>
),
'Listing date': (
<>
<circle cx="12" cy="12" r="10" />
<polyline points="12 6 12 12 16 14" />
</>
),
'Listing status': (
<>
<line x1="8" y1="6" x2="21" y2="6" />
<line x1="8" y1="12" x2="21" y2="12" />
<line x1="8" y1="18" x2="21" y2="18" />
<line x1="3" y1="6" x2="3.01" y2="6" />
<line x1="3" y1="12" x2="3.01" y2="12" />
<line x1="3" y1="18" x2="3.01" y2="18" />
</>
),
'Leasehold/Freehold': (
<>
<path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z" />
<polyline points="14 2 14 8 20 8" />
</>
),
'Property type': (
<>
<path d="M3 9l9-7 9 7v11a2 2 0 01-2 2H5a2 2 0 01-2-2z" />
<polyline points="9 22 9 12 15 12 15 22" />
</>
),
'Current energy rating': (
<>
<polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2" />
</>
),
'Potential energy rating': (
<>
<polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2" />
</>
),
// ── Transport ────────────────────────────────
'Train or tube stations within 1km': (
<>
<rect x="4" y="3" width="16" height="14" rx="2" />
<path d="M4 11h16" />
<circle cx="8" cy="15" r="1" fill="currentColor" />
<circle cx="16" cy="15" r="1" fill="currentColor" />
<path d="M8 21l-2-4h12l-2 4" />
</>
),
'Distance to nearest train or tube station (km)': (
<>
<path d="M12 2v8" />
<path d="M4.93 10.93l2.83 2.83" />
<path d="M2 18h2" />
<path d="M20 18h2" />
<path d="M19.07 10.93l-2.83 2.83" />
<circle cx="12" cy="18" r="4" />
<line x1="12" y1="18" x2="12" y2="15" />
</>
),
// ── Education ────────────────────────────────
'Education, Skills and Training Score': (
<>
<path d="M2 3h6a4 4 0 014 4 4 4 0 014-4h6v18a2 2 0 01-2 2h-4a4 4 0 00-4 4 4 4 0 00-4-4H4a2 2 0 01-2-2z" />
</>
),
'Good+ primary schools within 5km': (
<>
<path d="M4 19V9l8-6 8 6v10" />
<path d="M9 19v-6h6v6" />
<line x1="4" y1="19" x2="20" y2="19" />
</>
),
'Good+ secondary schools within 5km': (
<>
<path d="M22 10v6M2 10l10-5 10 5-10 5z" />
<path d="M6 12v5c0 2.5 3 4 6 4s6-1.5 6-4v-5" />
</>
),
// ── Deprivation ──────────────────────────────
'Income Score (rate)': (
<>
<rect x="2" y="6" width="20" height="14" rx="2" />
<path d="M2 10h20" />
<path d="M6 14h4m4 0h4" />
</>
),
'Employment Score (rate)': (
<>
<rect x="2" y="7" width="20" height="14" rx="2" />
<path d="M16 3h-8a2 2 0 00-2 2v2h12V5a2 2 0 00-2-2z" />
<path d="M12 12v4" />
</>
),
'Health Deprivation and Disability Score': (
<>
<path d="M20.42 4.58a5.4 5.4 0 00-7.65 0L12 5.34l-.77-.76a5.4 5.4 0 00-7.65 7.65L12 20.65l8.42-8.42a5.4 5.4 0 000-7.65z" />
</>
),
'Living Environment Score': (
<>
<path d="M3 9l9-7 9 7v11a2 2 0 01-2 2H5a2 2 0 01-2-2z" />
<path d="M9 16l2 2 4-4" />
</>
),
'Indoors Sub-domain Score': (
<>
<path d="M20 9V6a2 2 0 00-2-2H6a2 2 0 00-2 2v3" />
<path d="M2 11v5a2 2 0 002 2h1v3h2v-3h10v3h2v-3h1a2 2 0 002-2v-5a3 3 0 00-3-3H5a3 3 0 00-3 3z" />
</>
),
'Outdoors Sub-domain Score': (
<>
<path d="M11 20A7 7 0 019.8 6.9C15.5 4.9 20 9 20 9s-3.4 5.4-3.4 9c0 .6 0 1.2-.1 1.8" />
<path d="M12 10a3.5 3.5 0 00-5 5" />
</>
),
// ── Crime summary ────────────────────────────
'Serious crime (avg/yr)': (
<>
<path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z" />
<line x1="12" y1="9" x2="12" y2="13" />
<line x1="12" y1="17" x2="12.01" y2="17" />
</>
),
'Minor crime (avg/yr)': (
<>
<circle cx="12" cy="12" r="10" />
<line x1="12" y1="8" x2="12" y2="12" />
<line x1="12" y1="16" x2="12.01" y2="16" />
</>
),
'Serious crime per 1k residents (avg/yr)': (
<>
<line x1="12" y1="3" x2="12" y2="15" />
<circle cx="12" cy="18" r="3" />
<path d="M8 3h8" />
</>
),
'Minor crime per 1k residents (avg/yr)': (
<>
<path d="M17 21v-2a4 4 0 00-4-4H5a4 4 0 00-4 4v2" />
<circle cx="9" cy="7" r="4" />
<path d="M23 21v-2a4 4 0 00-3-3.87" />
<path d="M16 3.13a4 4 0 010 7.75" />
</>
),
// ── Crime ────────────────────────────────────
'Anti-social behaviour (avg/yr)': (
<>
<path d="M3 11l18-5v12L3 13v-2z" />
<path d="M11.6 16.8a3 3 0 015.8 0" />
</>
),
'Violence and sexual offences (avg/yr)': (
<>
<path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z" />
<line x1="12" y1="9" x2="12" y2="13" />
<line x1="12" y1="17" x2="12.01" y2="17" />
</>
),
'Criminal damage and arson (avg/yr)': (
<>
<path d="M12 12c2-2.67 4-4 4-6.5a4 4 0 10-8 0c0 2.5 2 3.83 4 6.5z" />
<path d="M10 17c0 1.1.9 2 2 2s2-.9 2-2c0-1.33-1-2-2-3-1 1-2 1.67-2 3z" />
</>
),
'Burglary (avg/yr)': (
<>
<rect x="3" y="11" width="18" height="11" rx="2" />
<path d="M7 11V7a5 5 0 0110 0v4" />
<path d="M12 17v-2" />
</>
),
'Vehicle crime (avg/yr)': (
<>
<path d="M5 17h14v-5l-2-5H7L5 12z" />
<circle cx="7.5" cy="17.5" r="2.5" />
<circle cx="16.5" cy="17.5" r="2.5" />
</>
),
'Robbery (avg/yr)': (
<>
<circle cx="12" cy="12" r="10" />
<line x1="4" y1="8" x2="20" y2="8" />
<line x1="4" y1="16" x2="20" y2="16" />
<line x1="12" y1="2" x2="12" y2="8" />
</>
),
'Other theft (avg/yr)': (
<>
<path d="M18 8a6 6 0 00-12 0c0 7-3 9-3 9h18s-3-2-3-9" />
<path d="M13.73 21a2 2 0 01-3.46 0" />
</>
),
'Shoplifting (avg/yr)': (
<>
<path d="M6 2L3 6v14a2 2 0 002 2h14a2 2 0 002-2V6l-3-4z" />
<line x1="3" y1="6" x2="21" y2="6" />
<path d="M16 10a4 4 0 01-8 0" />
</>
),
'Drugs (avg/yr)': (
<>
<path d="M10.5 1.5H8a6.5 6.5 0 000 13h8a6.5 6.5 0 000-13h-2.5" />
<line x1="12" y1="1" x2="12" y2="14" />
</>
),
'Possession of weapons (avg/yr)': (
<>
<circle cx="12" cy="12" r="10" />
<line x1="22" y1="12" x2="18" y2="12" />
<line x1="6" y1="12" x2="2" y2="12" />
<line x1="12" y1="6" x2="12" y2="2" />
<line x1="12" y1="22" x2="12" y2="18" />
</>
),
'Public order (avg/yr)': (
<>
<line x1="1" y1="12" x2="3" y2="12" />
<polyline points="8 8 12 4 16 8" />
<line x1="12" y1="4" x2="12" y2="20" />
<polyline points="8 16 12 20 16 16" />
<line x1="21" y1="12" x2="23" y2="12" />
</>
),
'Bicycle theft (avg/yr)': (
<>
<circle cx="6" cy="17" r="3" />
<circle cx="18" cy="17" r="3" />
<path d="M6 17l4-8h4l2 4 2-4" />
</>
),
'Theft from the person (avg/yr)': (
<>
<rect x="2" y="6" width="20" height="14" rx="2" />
<path d="M2 10h20" />
<path d="M6 14h4m4 0h4" />
</>
),
'Other crime (avg/yr)': (
<>
<path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z" />
<polyline points="14 2 14 8 20 8" />
<line x1="12" y1="12" x2="12" y2="16" />
<line x1="12" y1="18" x2="12.01" y2="18" />
</>
),
// ── Demographics ─────────────────────────────
'% White': (
<>
<path d="M17 21v-2a4 4 0 00-4-4H5a4 4 0 00-4 4v2" />
<circle cx="9" cy="7" r="4" />
<path d="M23 21v-2a4 4 0 00-3-3.87" />
<path d="M16 3.13a4 4 0 010 7.75" />
</>
),
'% South Asian': (
<>
<circle cx="12" cy="12" r="10" />
<path d="M2 12h20" />
<path d="M12 2a15.3 15.3 0 014 10 15.3 15.3 0 01-4 10 15.3 15.3 0 01-4-10 15.3 15.3 0 014-10z" />
</>
),
'% East Asian': (
<>
<path d="M21 10c0 7-9 13-9 13s-9-6-9-13a9 9 0 0118 0z" />
<circle cx="12" cy="10" r="3" />
</>
),
'% Black': (
<>
<path d="M21.21 15.89A10 10 0 118 2.83" />
<path d="M22 12A10 10 0 0012 2v10z" />
</>
),
'% Mixed': (
<>
<circle cx="9" cy="12" r="7" />
<circle cx="15" cy="12" r="7" />
</>
),
'% Other': (
<>
<line x1="4" y1="9" x2="20" y2="9" />
<line x1="4" y1="15" x2="20" y2="15" />
<line x1="10" y1="3" x2="8" y2="21" />
<line x1="16" y1="3" x2="14" y2="21" />
</>
),
// ── Amenities ────────────────────────────────
'Number of restaurants within 2km': (
<>
<path d="M3 2v8c0 1.1.9 2 2 2h2v10h2V12h2a2 2 0 002-2V2" />
<path d="M7 2v4" />
<path d="M19 2v20" />
<path d="M19 8a3 3 0 00-3-3" />
</>
),
'Number of grocery shops and supermarkets within 2km': (
<>
<circle cx="9" cy="21" r="1" />
<circle cx="20" cy="21" r="1" />
<path d="M1 1h4l2.68 13.39a2 2 0 002 1.61h9.72a2 2 0 002-1.61L23 6H6" />
</>
),
'Number of parks within 2km': (
<>
<path d="M12 22v-7" />
<path d="M17 15H7l2-4H5l7-9 7 9h-4l2 4z" />
</>
),
// ── Environment ──────────────────────────────
'Noise (dB)': (
<>
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5" />
<path d="M19.07 4.93a10 10 0 010 14.14" />
<path d="M15.54 8.46a5 5 0 010 7.07" />
</>
),
'Max available download speed (Mbps)': (
<>
<path d="M5 12.55a11 11 0 0114.08 0" />
<path d="M1.42 9a16 16 0 0121.16 0" />
<path d="M8.53 16.11a6 6 0 016.95 0" />
<line x1="12" y1="20" x2="12.01" y2="20" />
</>
),
'Environmental risk': (
<>
<path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z" />
<line x1="12" y1="9" x2="12" y2="13" />
<line x1="12" y1="17" x2="12.01" y2="17" />
</>
),
'Collapsible deposits risk': (
<>
<polyline points="12 2 2 7 12 12 22 7 12 2" />
<polyline points="2 17 12 22 22 17" />
<polyline points="2 12 12 17 22 12" />
</>
),
'Compressible ground risk': (
<>
<line x1="12" y1="2" x2="12" y2="22" />
<polyline points="16 6 12 2 8 6" />
<polyline points="16 18 12 22 8 18" />
<line x1="4" y1="12" x2="20" y2="12" />
</>
),
'Landslide risk': (
<>
<path d="M8 3l4 8 5-5 5 15H2L8 3z" />
</>
),
'Running sand risk': (
<>
<path d="M2 6c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
<path d="M2 12c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
<path d="M2 18c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
</>
),
'Shrink-swell risk': (
<>
<line x1="2" y1="12" x2="22" y2="12" />
<polyline points="6 8 2 12 6 16" />
<polyline points="18 8 22 12 18 16" />
</>
),
'Soluble rocks risk': (
<>
<path d="M12 2.69l5.66 5.66a8 8 0 11-11.31 0z" />
</>
),
};
/**
* Returns a complete SVG icon element for a given feature name, or null if unmapped.
*/
export function getFeatureIcon(
featureName: string,
className: string,
): ReactElement | null {
const paths = FEATURE_ICON_PATHS[featureName];
if (!paths) return null;
return (
<svg
className={className}
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth={2}
strokeLinecap="round"
strokeLinejoin="round"
>
{paths}
</svg>
);
}

View file

@ -8,6 +8,7 @@ import {
ZOOM_TO_RESOLUTION_THRESHOLDS,
TWEMOJI_BASE,
BUFFER_MULTIPLIER,
ENUM_PALETTE,
} from './consts';
const ROAD_OPACITY = 0.4;
@ -193,9 +194,16 @@ export function emojiToTwemojiUrl(emoji: string): string {
return `${TWEMOJI_BASE}${hex}.png`;
}
/** Look up a discrete color from the enum palette by index (wraps if > palette size). */
export function enumIndexToColor(index: number): [number, number, number] {
const i = Math.round(Math.max(0, index)) % ENUM_PALETTE.length;
return ENUM_PALETTE[i];
}
/**
* Shared fill-color logic for hex and postcode layers.
* When a viewFeature is active, normalizes by colorRange and applies the feature gradient.
* For enum features (enumCount > 0), uses discrete palette colors instead of gradient.
* Otherwise falls back to density-based coloring using countRange.
*/
export function getFeatureFillColor(
@ -207,7 +215,8 @@ export function getFeatureFillColor(
countNormalized: number,
densityGradient: GradientStop[],
isDark: boolean,
alpha: number
alpha: number,
enumCount: number = 0
): [number, number, number, number] {
if (colorRange) {
if (value == null)
@ -222,6 +231,13 @@ export function getFeatureFillColor(
];
}
}
// Discrete coloring for enum features
if (enumCount > 0) {
const rgb = enumIndexToColor(Math.round(value as number));
return [...rgb, alpha] as [number, number, number, number];
}
const range = colorRange[1] - colorRange[0];
if (range === 0)
return [...FEATURE_GRADIENT[0].color, alpha] as [number, number, number, number];

View file

@ -15,8 +15,7 @@ module.exports = (env, argv) => {
filename: 'bundle.js',
clean: true,
// Dev needs '/' for HMR WebSocket; prod uses '' for relative paths through proxies
publicPath: isProduction ? '' : '/',
publicPath: '/',
},
resolve: {
extensions: ['.ts', '.tsx', '.js'],

View file

@ -249,8 +249,18 @@ def _build(
.when(has_epc)
.then(pl.col("epc_property_type"))
.otherwise(pl.col("pp_property_type"))
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes"
.replace({"Flat": "Flats/Maisonettes", "Maisonette": "Flats/Maisonettes"})
# Unify EPC's "Flat"/"Maisonette" with price-paid's "Flats/Maisonettes",
# collapse terrace sub-types, and fold rare types into "Other"
.replace({
"Flat": "Flats/Maisonettes",
"Maisonette": "Flats/Maisonettes",
"End-Terrace": "Terraced",
"Mid-Terrace": "Terraced",
"Enclosed End-Terrace": "Terraced",
"Enclosed Mid-Terrace": "Terraced",
"Bungalow": "Other",
"Park home": "Other",
})
.alias("property_type")
)

View file

@ -1,4 +1,4 @@
"""Count POIs within a radius of properties, optimized via postcode deduplication."""
"""Count POIs within a radius of properties, optimised via postcode deduplication."""
import numpy as np
import polars as pl
@ -6,6 +6,49 @@ import polars as pl
from .haversine import haversine_km
def _build_poi_grid(
pois: pl.DataFrame, grid_size: float = 0.05
) -> tuple[np.ndarray, np.ndarray, np.ndarray, dict]:
"""Build spatial grid index for POIs. Returns (lats, lngs, cats, grid_dict)."""
poi_lats = pois["lat"].to_numpy()
poi_lngs = pois["lng"].to_numpy()
poi_cats = pois["category"].to_numpy()
poi_grid_lats = np.floor(poi_lats / grid_size).astype(np.int32)
poi_grid_lngs = np.floor(poi_lngs / grid_size).astype(np.int32)
poi_grid: dict[tuple[int, int], list[int]] = {}
for i in range(len(pois)):
key = (poi_grid_lats[i], poi_grid_lngs[i])
if key not in poi_grid:
poi_grid[key] = []
poi_grid[key].append(i)
for key in poi_grid:
poi_grid[key] = np.array(poi_grid[key], dtype=np.int32)
return poi_lats, poi_lngs, poi_cats, poi_grid
def _get_nearby_indices(
pc_lat: float, pc_lon: float, poi_grid: dict, grid_size: float = 0.05
) -> np.ndarray | None:
"""Get POI indices from grid cells near the given coordinate."""
grid_lat = int(np.floor(pc_lat / grid_size))
grid_lng = int(np.floor(pc_lon / grid_size))
nearby_indices = []
for dlat in [-1, 0, 1]:
for dlng in [-1, 0, 1]:
cell_key = (grid_lat + dlat, grid_lng + dlng)
if cell_key in poi_grid:
nearby_indices.append(poi_grid[cell_key])
if not nearby_indices:
return None
return np.concatenate(nearby_indices)
def count_pois_per_postcode(
postcodes_df: pl.DataFrame,
pois: pl.DataFrame,
@ -22,31 +65,9 @@ def count_pois_per_postcode(
n_pois = len(pois)
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
# Build spatial grid for POIs (0.05 degree cells ~5.5km)
grid_size = 0.05
print(" Building POI spatial grid...")
# Convert to numpy arrays
poi_lats = pois["lat"].to_numpy()
poi_lngs = pois["lng"].to_numpy()
poi_cats = pois["category"].to_numpy()
# Compute grid coordinates for all POIs
poi_grid_lats = np.floor(poi_lats / grid_size).astype(np.int32)
poi_grid_lngs = np.floor(poi_lngs / grid_size).astype(np.int32)
# Build grid cell lookup using numpy indexing
poi_grid = {}
for i in range(n_pois):
key = (poi_grid_lats[i], poi_grid_lngs[i])
if key not in poi_grid:
poi_grid[key] = []
poi_grid[key].append(i)
# Convert grid values to numpy arrays for faster indexing
for key in poi_grid:
poi_grid[key] = np.array(poi_grid[key], dtype=np.int32)
poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size)
print(f" POI grid has {len(poi_grid):,} occupied cells")
# Pre-compute category masks
@ -81,38 +102,18 @@ def count_pois_per_postcode(
# Process batch
for i in range(start_idx, end_idx):
pc_lat = pc_lats[i]
pc_lon = pc_lons[i]
# Find grid cells to check (3x3 grid)
grid_lat = int(np.floor(pc_lat / grid_size))
grid_lng = int(np.floor(pc_lon / grid_size))
# Collect nearby POI indices
nearby_indices = []
for dlat in [-1, 0, 1]:
for dlng in [-1, 0, 1]:
cell_key = (grid_lat + dlat, grid_lng + dlng)
if cell_key in poi_grid:
nearby_indices.append(poi_grid[cell_key])
if not nearby_indices:
nearby = _get_nearby_indices(pc_lats[i], pc_lons[i], poi_grid, grid_size)
if nearby is None:
continue
# Concatenate all nearby POI indices
nearby = np.concatenate(nearby_indices)
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
# Vectorized distance calculation for all nearby POIs
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lat, pc_lon)
# Filter by radius
within_mask = distances <= radius_km
within_indices = nearby[within_mask]
if len(within_indices) == 0:
continue
# Count by category group using pre-computed masks
for group, cat_mask in category_masks.items():
result_counts[group][i] = cat_mask[within_indices].sum()
@ -124,3 +125,71 @@ def count_pois_per_postcode(
result = pl.DataFrame(result_data)
print(" Completed POI counting")
return result
def min_distance_per_postcode(
postcodes_df: pl.DataFrame,
pois: pl.DataFrame,
groups: dict[str, list[str]],
) -> pl.DataFrame:
"""
For each postcode, compute the distance (km) to the closest POI per group.
Returns NaN where no POI of that group exists within the grid search range (~5.5km).
"""
print("Computing minimum POI distances per postcode...")
n_postcodes = len(postcodes_df)
n_pois = len(pois)
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
grid_size = 0.05
print(" Building POI spatial grid...")
poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size)
print(f" POI grid has {len(poi_grid):,} occupied cells")
category_masks = {}
for group, categories in groups.items():
mask = np.isin(poi_cats, categories)
category_masks[group] = mask
print(f" {group}: {mask.sum():,} POIs")
pc_lats = postcodes_df["lat"].to_numpy()
pc_lons = postcodes_df["lon"].to_numpy()
pc_codes = postcodes_df["postcode"].to_list()
result_min_dist = {
group: np.full(n_postcodes, np.nan, dtype=np.float32) for group in groups
}
batch_size = 50000
n_batches = (n_postcodes + batch_size - 1) // batch_size
print(f" Processing {n_postcodes:,} postcodes in {n_batches} batches...")
for batch_idx in range(n_batches):
start_idx = batch_idx * batch_size
end_idx = min(start_idx + batch_size, n_postcodes)
if batch_idx % 5 == 0:
print(
f" Batch {batch_idx + 1}/{n_batches}: postcodes {start_idx:,} - {end_idx:,}"
)
for i in range(start_idx, end_idx):
nearby = _get_nearby_indices(pc_lats[i], pc_lons[i], poi_grid, grid_size)
if nearby is None:
continue
distances = haversine_km(poi_lats[nearby], poi_lngs[nearby], pc_lats[i], pc_lons[i])
for group, cat_mask in category_masks.items():
group_mask = cat_mask[nearby]
if group_mask.any():
result_min_dist[group][i] = distances[group_mask].min()
result_data = {"postcode": pc_codes}
for group in groups:
result_data[f"{group}_nearest_km"] = result_min_dist[group]
result = pl.DataFrame(result_data)
print(" Completed minimum distance computation")
return result

View file

@ -10,7 +10,7 @@ from scipy.spatial import cKDTree
def build_postcode_mapping(arcgis_path: Path) -> pl.DataFrame:
"""Build a mapping from terminated England postcodes to their nearest active postcode.
Uses OS National Grid coordinates (oseast1m, osnrth1m) which are Cartesian meters,
Uses OS National Grid coordinates (oseast1m, osnrth1m) which are Cartesian metres,
so Euclidean distance via cKDTree gives accurate results without projection.
"""
arcgis = pl.scan_parquet(arcgis_path).filter(pl.col("ctry") == "E92000001")

View file

@ -1,13 +1,14 @@
import numpy as np
import polars as pl
import pytest
from pipeline.utils.poi_counts import count_pois_per_postcode
from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_postcode
POI_GROUPS = {
"restaurants": ["Restaurant", "Fast Food"],
"groceries": ["Supermarket"],
"parks": ["Park"],
"public_transport": ["Station"],
"train_tube": ["Rail station", "Metro or Tram stop"],
}
@ -23,7 +24,7 @@ def pois():
"Fast Food",
"Supermarket",
"Park",
"Station",
"Rail station",
"Restaurant", # too far from any property
],
}
@ -55,7 +56,7 @@ def test_counts_pois_within_radius(postcodes, pois):
assert ec1a["restaurants_2km"][0] == 2 # Restaurant + Fast Food
assert ec1a["groceries_2km"][0] == 1 # Supermarket
assert ec1a["parks_2km"][0] == 1 # Park
assert ec1a["public_transport_2km"][0] == 1 # Station
assert ec1a["train_tube_2km"][0] == 1 # Rail station
# Far-away postcode should have zero counts
zz99 = result.filter(pl.col("postcode") == "ZZ99 9ZZ")
@ -96,3 +97,35 @@ def test_custom_radius(pois):
# POIs >100m away should not be counted
total = sum(result[f"{g}_0km"][0] for g in POI_GROUPS)
assert total <= 2 # at most the co-located POIs
def test_min_distance_finds_nearest(postcodes, pois):
"""min_distance_per_postcode returns distance to closest POI per group."""
result = min_distance_per_postcode(postcodes, pois, groups=POI_GROUPS)
assert len(result) == 2
ec1a = result.filter(pl.col("postcode") == "EC1A 1BB")
# Rail station is at (51.5073, -0.1277), postcode at (51.5074, -0.1278) — very close
assert ec1a["train_tube_nearest_km"][0] < 0.05 # within 50m
# Restaurant is co-located — distance ~0
assert ec1a["restaurants_nearest_km"][0] < 0.01
# Far-away postcode should have NaN (no POIs within grid range)
zz99 = result.filter(pl.col("postcode") == "ZZ99 9ZZ")
assert np.isnan(zz99["train_tube_nearest_km"][0])
def test_min_distance_no_pois_returns_nan(postcodes):
"""With no POIs, all distances should be NaN."""
empty_pois = pl.DataFrame(
{
"lat": pl.Series([], dtype=pl.Float64),
"lng": pl.Series([], dtype=pl.Float64),
"category": pl.Series([], dtype=pl.String),
}
)
result = min_distance_per_postcode(postcodes, empty_pois, groups={"train_tube": ["Rail station"]})
assert "train_tube_nearest_km" in result.columns
assert all(np.isnan(v) for v in result["train_tube_nearest_km"].to_list())

View file

@ -20,13 +20,11 @@ pub const AI_FILTERS_TEMPERATURE: f32 = 0.0;
/// Timeout for outbound HTTP service calls (seconds).
pub const SERVICE_CALL_TIMEOUT: u64 = 120;
/// Inner London free zone bounds (south, west, north, east) — roughly zones 12.
/// Inner London free zone bounds (south, west, north, east) — roughly zone 1.
/// Users without a license can only query data within these bounds.
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.42, -0.34, 51.60, 0.14);
pub const FREE_ZONE_BOUNDS: (f64, f64, f64, f64) = (51.44, -0.31, 51.59, 0.05);
/// Homepage demo center (lat, lng) and tolerance for the license bypass.
/// Hexagon requests centered within this tolerance skip the license check,
/// so the ScrollStory animation works for anonymous visitors.
/// ~0.05° ≈ 5.5 km — covers central London only.
pub const DEMO_CENTER: (f64, f64) = (51.51, -0.12);
pub const DEMO_CENTER_TOLERANCE: f64 = 0.05;
/// Exact demo bounds (south, west, north, east) sent by the homepage ScrollStory.
/// Requests matching these exact values bypass the license check so the
/// animation works for anonymous visitors. Only this specific viewport is allowed.
pub const DEMO_BOUNDS: (f64, f64, f64, f64) = (46.0, -12.0, 56.5, 12.0);

View file

@ -1058,9 +1058,9 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
},
EnumFeatureConfig {
name: "Property type",
order: Some(&["Detached", "Semi-Detached", "Terraced", "Flats/Maisonettes"]),
description: "Type of property: detached, semi-detached, terraced, or flat/maisonette",
detail: "From HM Land Registry Price Paid data. The broad property type classification: Detached, Semi-Detached, Terraced, or Flats/Maisonettes.",
order: Some(&["Detached", "Semi-Detached", "Terraced", "Flats/Maisonettes", "Other"]),
description: "Type of property: detached, semi-detached, terraced, flat/maisonette, or other",
detail: "From HM Land Registry Price Paid data and EPC certificates. Detached, Semi-Detached, Terraced (includes all terrace sub-types), Flats/Maisonettes, or Other (bungalows, park homes, etc.).",
source: "price-paid",
},
EnumFeatureConfig {

View file

@ -205,7 +205,6 @@ pub async fn post_ai_filters(
) -> Result<Json<AiFiltersResponse>, (StatusCode, String)> {
info!(query = %req.query, "POST /api/ai-filters");
// Use Ollama native API with structured output
let url = format!("{}/api/chat", state.ollama_url);
let body = json!({
"model": state.ollama_model,
@ -221,29 +220,65 @@ pub async fn post_ai_filters(
}
});
let json_resp = ollama_chat(&state.http_client, &url, &body).await?;
let content = extract_ollama_content(&json_resp)?;
let content = strip_think_blocks(content);
let content = content.trim();
let raw: Value = serde_json::from_str(content).map_err(|err| {
warn!(error = %err, content = %content, "Failed to parse LLM JSON output");
(
StatusCode::BAD_GATEWAY,
format!("Failed to parse LLM output as JSON: {}", err),
)
})?;
// Validate and convert to FeatureFilters format
// Try up to 2 attempts — LLMs occasionally return empty content (e.g. only
// <think> blocks with no JSON output), which is transient and usually
// succeeds on retry.
let mut last_err = None;
for attempt in 0..2 {
let raw = call_ollama_and_parse(&state.http_client, &url, &body).await;
match raw {
Ok(raw) => {
let filters = validate_and_convert(&raw, &state.features_response);
let notes = raw
.get("notes")
.and_then(|val| val.as_str())
.unwrap_or("")
.to_string();
return Ok(Json(AiFiltersResponse { filters, notes }));
}
Err(err) => {
if attempt == 0 {
warn!("LLM attempt 1 failed, retrying: {}", err.1);
}
last_err = Some(err);
}
}
}
Ok(Json(AiFiltersResponse { filters, notes }))
Err(last_err.unwrap())
}
/// Call Ollama and parse the response content as JSON.
///
/// Returns an error if: the HTTP call fails, the response is malformed,
/// the content is empty after stripping think blocks, or the content is
/// not valid JSON.
async fn call_ollama_and_parse(
client: &reqwest::Client,
url: &str,
body: &Value,
) -> Result<Value, (StatusCode, String)> {
let json_resp = ollama_chat(client, url, body).await?;
let content = extract_ollama_content(&json_resp)?;
let content = strip_think_blocks(content);
let content = content.trim();
if content.is_empty() {
warn!("LLM returned empty content after stripping think blocks");
return Err((
StatusCode::BAD_GATEWAY,
"LLM returned empty content (no JSON output)".into(),
));
}
serde_json::from_str(content).map_err(|err| {
warn!(error = %err, content = %content, "Failed to parse LLM JSON output");
(
StatusCode::BAD_GATEWAY,
format!("Failed to parse LLM output as JSON: {}", err),
)
})
}
/// Validate LLM output against feature metadata and convert to FeatureFilters format.

View file

@ -11,7 +11,7 @@ use tracing::info;
use crate::aggregation::Aggregator;
use crate::auth::OptionalUser;
use crate::consts::{DEMO_CENTER, DEMO_CENTER_TOLERANCE, MAX_CELLS_PER_REQUEST};
use crate::consts::{DEMO_BOUNDS, MAX_CELLS_PER_REQUEST};
use crate::data::travel_time::TravelData;
use crate::licensing::check_license_bounds;
use crate::parsing::{
@ -139,11 +139,7 @@ pub async fn get_hexagons(
let (south, west, north, east) =
require_bounds(params.bounds).map_err(IntoResponse::into_response)?;
let center_lat = (south + north) / 2.0;
let center_lng = (west + east) / 2.0;
let is_demo_view = (center_lat - DEMO_CENTER.0).abs() <= DEMO_CENTER_TOLERANCE
&& (center_lng - DEMO_CENTER.1).abs() <= DEMO_CENTER_TOLERANCE;
let is_demo_view = (south, west, north, east) == DEMO_BOUNDS;
if !is_demo_view {
check_license_bounds(&user.0, (south, west, north, east))
.map_err(|(_, resp)| resp)?;

View file

@ -159,13 +159,26 @@ pub async fn get_invite(
}
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(&state.http_client, pb_url, &state.pocketbase_admin_email, &state.pocketbase_admin_password).await
{
Ok(t) => t,
Err(err) => {
warn!("Failed to auth as PocketBase superuser: {err}");
return StatusCode::BAD_GATEWAY.into_response();
}
};
let filter = format!("code=\"{}\"", code);
let url = format!(
"{pb_url}/api/collections/invites/records?filter={}&perPage=1",
urlencoding::encode(&filter)
);
let res = match state.http_client.get(&url).send().await {
let res = match state.http_client.get(&url)
.header("Authorization", format!("Bearer {token}"))
.send().await
{
Ok(r) => r,
Err(err) => {
warn!("Failed to look up invite: {err}");

View file

@ -12,6 +12,7 @@ use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT, POSTCODE_SEA
use crate::licensing::check_license_point;
use crate::parsing::{parse_filters, row_passes_filters};
use crate::state::AppState;
use crate::utils::normalize_postcode;
use super::properties::{HexagonPropertiesResponse, Property};
@ -28,12 +29,7 @@ pub async fn get_postcode_properties(
Extension(user): Extension<OptionalUser>,
Query(params): Query<PostcodePropertiesParams>,
) -> Result<Json<HexagonPropertiesResponse>, axum::response::Response> {
let normalized = params
.postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
let normalized = normalize_postcode(&params.postcode);
let pc_idx = match state.postcode_data.postcode_to_idx.get(&normalized) {
Some(&idx) => idx,

View file

@ -12,6 +12,7 @@ use crate::consts::POSTCODE_SEARCH_OFFSET;
use crate::licensing::check_license_point;
use crate::parsing::{parse_field_set, parse_filters, row_passes_filters};
use crate::state::AppState;
use crate::utils::normalize_postcode;
use super::hexagon_stats::HexagonStatsResponse;
use super::stats;
@ -30,13 +31,7 @@ pub async fn get_postcode_stats(
Extension(user): Extension<OptionalUser>,
Query(params): Query<PostcodeStatsParams>,
) -> Result<Json<HexagonStatsResponse>, axum::response::Response> {
// Normalize postcode: uppercase, collapse whitespace
let normalized = params
.postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
let normalized = normalize_postcode(&params.postcode);
// Look up postcode centroid for spatial search
let pc_idx = match state.postcode_data.postcode_to_idx.get(&normalized) {

View file

@ -19,6 +19,7 @@ use crate::parsing::{
};
use crate::routes::travel_time::{parse_travel_entries, TravelTimeAgg};
use crate::state::AppState;
use crate::utils::normalize_postcode;
#[derive(Serialize)]
pub struct PostcodesResponse {
@ -361,12 +362,7 @@ pub async fn get_postcode_lookup(
state: Arc<AppState>,
Path(postcode): Path<String>,
) -> Result<Json<Value>, StatusCode> {
// Normalize the postcode: uppercase, remove extra spaces, ensure single space
let normalized = postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
let normalized = normalize_postcode(&postcode);
let postcode_data = &state.postcode_data;

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashSet;
use serde::{Deserialize, Serialize};
use tracing::info;
@ -62,6 +63,11 @@ pub async fn get_travel_destinations(
// Sort: type rank asc, population desc, name length asc
matches.sort_unstable_by(|a, b| a.2.cmp(&b.2).then(b.3.cmp(&a.3)).then(a.4.cmp(&b.4)));
// Deduplicate by slug — multiple places can share a name/slug
// (e.g. "Richmond" as city + suburb), keep the best-ranked one
let mut seen_slugs = FxHashSet::default();
matches.retain(|(_, slug, ..)| seen_slugs.insert(slug.clone()));
let results: Vec<DestinationResult> = matches
.into_iter()
.map(|(idx, slug, ..)| DestinationResult {

View file

@ -7,3 +7,16 @@ pub use grid_index::GridIndex;
pub use hash::{generate_priorities, splitmix64_hash};
pub use interned_column::InternedColumn;
pub use llm::{extract_ollama_content, ollama_chat, strip_think_blocks};
/// Normalize a UK postcode: uppercase, strip spaces, insert canonical space before inward code.
/// e.g. "e142dg" → "E14 2DG", "E14 2DG" → "E14 2DG", "EC1A1BB" → "EC1A 1BB"
pub fn normalize_postcode(raw: &str) -> String {
let stripped: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
let upper = stripped.to_uppercase();
if upper.len() >= 5 {
let (outward, inward) = upper.split_at(upper.len() - 3);
format!("{} {}", outward, inward)
} else {
upper
}
}