This commit is contained in:
Andras Schmelczer 2026-05-28 21:48:35 +01:00
parent 39ef5c6646
commit c995f12f8b
78 changed files with 4830 additions and 1619 deletions

2
.gitignore vendored
View file

@ -24,3 +24,5 @@ video/auth.*
r5-java/tmp
property-data
property-data2
property-data3

View file

@ -13,6 +13,7 @@ FINDER_DATA := ./finder/data
# ── Output files ──────────────────────────────────────────────────────────────
TILES := $(DATA_DIR)/uk.pmtiles
SATELLITE_TILES := $(DATA_DIR)/satellite.pmtiles
ARCGIS := $(DATA_DIR)/arcgis_data.parquet
PRICE_PAID := $(DATA_DIR)/price-paid-complete.parquet
IOD := $(DATA_DIR)/IoD2025_Scores.parquet
@ -44,7 +45,7 @@ OFSTED := $(DATA_DIR)/ofsted.parquet
GIAS := $(DATA_DIR)/gias.parquet
NAPTAN := $(DATA_DIR)/naptan.parquet
BROADBAND := $(DATA_DIR)/broadband.parquet
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.gpkg
CONSERVATION_AREAS := $(DATA_DIR)/conservation_areas.geojson
LISTED_BUILDINGS := $(DATA_DIR)/listed_buildings.gpkg
SCHOOL_PROX := $(DATA_DIR)/school_proximity.parquet
RENTAL := $(DATA_DIR)/rental_prices.parquet
@ -77,6 +78,8 @@ INSPIRE_STAMP := $(INSPIRE_DIR)/.done
MAP_ASSETS_STAMP := $(MAP_ASSETS_DIR)/.done
PMTILES_VERSION := 1.22.3
PMTILES_BIN := $(DATA_DIR)/pmtiles
SATELLITE_TILE_ARGS ?=
VALIDATE_OUTPUTS := uv run python -m pipeline.validate_outputs
@ -101,7 +104,7 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
# ── Phony aliases ─────────────────────────────────────────────────────────────
.PHONY: prepare merge tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
.PHONY: prepare merge tiles satellite-tiles overlay-tiles noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles \
download-arcgis download-price-paid download-deprivation download-ethnicity \
download-naptan download-pois download-grocery-retail-points download-ofsted download-gias download-broadband download-conservation-areas download-listed-buildings download-rental-prices \
download-postcodes download-noise download-inspire download-crime \
@ -111,12 +114,13 @@ MAP_ASSETS_DEPS := pipeline/download/map_assets.py pipeline/transform/transform_
transform-school-proximity transform-tree-density \
generate-postcode-boundaries generate-travel-times enrich-actual-listings
prepare: $(PRICES_STAMP) download-places tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
prepare: $(PRICES_STAMP) download-places tiles satellite-tiles overlay-tiles generate-postcode-boundaries download-map-assets generate-travel-times | $(POSTCODES_PQ) $(PROPERTIES_PQ) $(PRICE_INDEX)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ) --parquet $(PRICE_INDEX)
merge: $(MERGE_STAMP) | $(POSTCODES_PQ) $(PROPERTIES_PQ)
$(VALIDATE_OUTPUTS) --parquet $(POSTCODES_PQ) --parquet $(PROPERTIES_PQ)
enrich-actual-listings: $(ACTUAL_LISTINGS_ENRICHED)
tiles: $(TILES)
tiles: $(TILES) $(SATELLITE_TILES)
satellite-tiles: $(SATELLITE_TILES)
overlay-tiles: noise-overlay-tiles crime-hotspot-tiles tree-overlay-tiles
noise-overlay-tiles: $(NOISE_OVERLAY_TILES)
crime-hotspot-tiles: $(CRIME_HOTSPOT_TILES)
@ -183,9 +187,15 @@ generate-travel-times: $(ARCGIS) $(PLACES) $(PBF) download-transit-network
# ── Downloads ─────────────────────────────────────────────────────────────────
$(TILES):
$(PMTILES_BIN): pipeline/download/tiles.py
uv run python -c 'from pathlib import Path; from pipeline.download.tiles import ensure_pmtiles_cli; ensure_pmtiles_cli(Path("$(PMTILES_BIN)"), "$(PMTILES_VERSION)")'
$(TILES): $(PMTILES_BIN)
uv run -m pipeline.download.tiles --output $@ --pmtiles-version $(PMTILES_VERSION)
$(SATELLITE_TILES): $(PMTILES_BIN) pipeline/download/satellite_tiles.py pipeline/download/tiles.py
uv run python -m pipeline.download.satellite_tiles --output $@ --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION) $(SATELLITE_TILE_ARGS)
# EPC requires manual registration — fail with instructions
$(EPC):
@echo ""
@ -260,8 +270,8 @@ $(POSTCODES_RAW):
$(NOISE): $(ARCGIS) pipeline/download/noise.py
uv run python -m pipeline.download.noise --arcgis $(ARCGIS) --output $@
$(NOISE_OVERLAY_TILES): pipeline/transform/noise_overlay_tiles.py pipeline/download/noise.py pipeline/download/tiles.py
uv run python -m pipeline.transform.noise_overlay_tiles --output $@ --raster-dir $(DATA_DIR)/noise_overlay_rasters --pmtiles-bin $(DATA_DIR)/pmtiles --pmtiles-version $(PMTILES_VERSION)
$(NOISE_OVERLAY_TILES): $(PMTILES_BIN) pipeline/transform/noise_overlay_tiles.py pipeline/download/noise.py pipeline/download/tiles.py
uv run python -m pipeline.transform.noise_overlay_tiles --output $@ --raster-dir $(DATA_DIR)/noise_overlay_rasters --pmtiles-bin $(PMTILES_BIN) --pmtiles-version $(PMTILES_VERSION)
$(CRIME_HOTSPOT_TILES): $(CRIME_STAMP) pipeline/transform/crime_hotspot_tiles.py pipeline/transform/crime.py
uv run python -m pipeline.transform.crime_hotspot_tiles --input $(CRIME_DIR) --output $@
@ -409,12 +419,30 @@ $(PRICES_STAMP): $(MERGE_STAMP) $(PRICE_INDEX) $(PRICE_ESTIMATE_DEPS) | $(PROPER
$(VALIDATE_OUTPUTS) --parquet $(PROPERTIES_PQ) --parquet $(POSTCODES_PQ) --parquet $(PRICE_INDEX)
@touch $@
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(PRICES_STAMP) $(POSTCODES_PQ) $(ARCGIS) $(EPC) \
pipeline/transform/enrich_actual_listings.py pipeline/transform/join_epc_pp.py pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.enrich_actual_listings \
--listings $(ACTUAL_LISTINGS_RAW) \
--properties $(PROPERTIES_PQ) \
--postcode-features $(POSTCODES_PQ) \
$(ACTUAL_LISTINGS_ENRICHED): $(ACTUAL_LISTINGS_RAW) $(EPC) \
$(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) \
$(CONSERVATION_AREAS) $(LISTED_BUILDINGS) $(RENTAL) \
$(LSOA_POP) $(MEDIAN_AGE) $(ELECTION) $(TREE_DENSITY_PC) \
$(MERGE_DEPS) pipeline/utils/fuzzy_join.py
uv run python -m pipeline.transform.merge \
--epc-pp $(EPC_PP) \
--arcgis $(ARCGIS) \
--iod $(IOD) \
--poi-proximity $(POI_PROXIMITY) \
--ethnicity $(ETHNICITY) \
--crime $(CRIME) \
--noise $(NOISE) \
--school-proximity $(SCHOOL_PROX) \
--broadband $(BROADBAND) \
--conservation-areas $(CONSERVATION_AREAS) \
--listed-buildings $(LISTED_BUILDINGS) \
--rental-prices $(RENTAL) \
--lsoa-population $(LSOA_POP) \
--median-age $(MEDIAN_AGE) \
--election-results $(ELECTION) \
--tree-density-postcodes $(TREE_DENSITY_PC) \
--actual-listings $(ACTUAL_LISTINGS_RAW) \
--epc $(EPC) \
--output $@
--output-listings $@
$(VALIDATE_OUTPUTS) --parquet $@

View file

@ -86,9 +86,8 @@ make -f Makefile.data download-places
make -f Makefile.data generate-postcode-boundaries
```
`generate-postcode-boundaries` writes to `manual-data/postcode_boundaries/`.
The running server expects the same structure under
`property-data/postcode_boundaries/`; copy or symlink it if needed.
`generate-postcode-boundaries` writes to `property-data/postcode_boundaries/`,
which is the same directory the local server expects by default.
Travel times are built separately because they are expensive:

View file

@ -29,6 +29,8 @@ from constants import (
)
from spatial import PostcodeSpatialIndex
from transform import (
clean_listing_address,
extract_full_postcode,
fix_coords,
map_property_type,
normalize_sub_type,
@ -177,9 +179,13 @@ def transform_property(
if not (49 <= lat <= 56 and -7 <= lng <= 2):
return None
postcode = pc_index.nearest(lat, lng)
if not postcode:
inferred_postcode = pc_index.nearest(lat, lng)
if not inferred_postcode:
return None
raw_address = raw.get("address", "") or ""
extracted_postcode = extract_full_postcode(raw_address)
postcode = extracted_postcode or inferred_postcode
postcode_source = "address" if extracted_postcode else "coordinates"
raw_beds = raw.get("bedrooms") or 0
raw_baths = raw.get("bathrooms") or 0
@ -212,7 +218,11 @@ def transform_property(
"lon": lng,
"lat": lat,
"Postcode": postcode,
"Address per Property Register": raw.get("address", ""),
"Postcode source": postcode_source,
"Extracted postcode": extracted_postcode,
"Inferred postcode": inferred_postcode,
"Listing raw address": raw_address,
"Address per Property Register": clean_listing_address(raw_address),
"Leasehold/Freehold": _extract_tenure(features),
"Property type": map_property_type(sub_type),
"Property sub-type": normalize_sub_type(sub_type),

View file

@ -105,6 +105,24 @@ def write_parquet(properties: list[dict], path: Path) -> None:
"lon": [p["lon"] for p in properties],
"lat": [p["lat"] for p in properties],
"Postcode": [normalize_postcode(p["Postcode"]) for p in properties],
"Postcode source": [p.get("Postcode source", "") for p in properties],
"Extracted postcode": [
normalize_postcode(p["Extracted postcode"])
if p.get("Extracted postcode")
else None
for p in properties
],
"Inferred postcode": [
normalize_postcode(p["Inferred postcode"])
if p.get("Inferred postcode")
else None
for p in properties
],
"Listing raw address": [
p.get("Listing raw address")
or p.get("Address per Property Register", "")
for p in properties
],
"Address per Property Register": [
p["Address per Property Register"] for p in properties
],
@ -126,6 +144,10 @@ def write_parquet(properties: list[dict], path: Path) -> None:
"lon": pl.Float64,
"lat": pl.Float64,
"Postcode": pl.Utf8,
"Postcode source": pl.Utf8,
"Extracted postcode": pl.Utf8,
"Inferred postcode": pl.Utf8,
"Listing raw address": pl.Utf8,
"Address per Property Register": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Property type": pl.Utf8,

48
finder/test_transform.py Normal file
View file

@ -0,0 +1,48 @@
from transform import (
clean_listing_address,
extract_full_postcode,
transform_property,
)
class StubPostcodeIndex:
def nearest(self, lat: float, lng: float) -> str:
return "SW1A 9ZZ"
def test_extract_full_postcode_normalizes_spacing() -> None:
assert extract_full_postcode("10 Downing Street SW1A2AA") == "SW1A 2AA"
assert extract_full_postcode("10 Downing Street, SW1A 2AA") == "SW1A 2AA"
assert extract_full_postcode("Downing Street, Westminster") is None
def test_clean_listing_address_removes_postcode_and_outcode_suffixes() -> None:
assert clean_listing_address("10 Downing Street, SW1A 2AA") == "10 Downing Street"
assert clean_listing_address("Hawthorne Road, Bromley, Kent, BR1") == (
"Hawthorne Road, Bromley, Kent"
)
assert clean_listing_address("Kings Avenue, Bromley") == "Kings Avenue, Bromley"
def test_rightmove_transform_prefers_postcode_from_display_address() -> None:
prop = {
"id": "123",
"location": {"latitude": 51.5, "longitude": -0.1},
"price": {"amount": 750000, "displayPrices": []},
"propertySubType": "Terraced",
"bedrooms": 3,
"bathrooms": 1,
"keyFeatures": [],
"propertyUrl": "/properties/123",
"displayAddress": "Flat 2, 10 Downing Street, SW1A 2AA",
}
result = transform_property(prop, "SW1A", StubPostcodeIndex())
assert result is not None
assert result["Postcode"] == "SW1A 2AA"
assert result["Postcode source"] == "address"
assert result["Extracted postcode"] == "SW1A 2AA"
assert result["Inferred postcode"] == "SW1A 9ZZ"
assert result["Listing raw address"] == "Flat 2, 10 Downing Street, SW1A 2AA"
assert result["Address per Property Register"] == "Flat 2, 10 Downing Street"

View file

@ -14,6 +14,18 @@ log = logging.getLogger("rightmove")
# UK mansions.
MIN_FLOOR_AREA_SQM = 5.0
MAX_FLOOR_AREA_SQM = 2000.0
FULL_POSTCODE_RE = re.compile(
r"\b([A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2})\b",
re.IGNORECASE,
)
TRAILING_FULL_POSTCODE_RE = re.compile(
r"(?:,?\s*)\b[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}\b\s*$",
re.IGNORECASE,
)
TRAILING_OUTCODE_RE = re.compile(
r"(?:,?\s*)\b[A-Z]{1,2}\d[A-Z\d]?\b\s*$",
re.IGNORECASE,
)
def validate_floor_area(sqm: float | None) -> float | None:
@ -184,6 +196,32 @@ def normalize_postcode(postcode: str) -> str:
return compact[:-3] + " " + compact[-3:]
def extract_full_postcode(text: str | None) -> str | None:
if not text:
return None
match = FULL_POSTCODE_RE.search(text)
if not match:
return None
return normalize_postcode(match.group(1))
def clean_listing_address(address: str | None) -> str:
"""Remove postcode/outcode suffixes from listing display addresses.
Listing sites often include "..., BR1" or "..., SW1A 1AA" in their public
address. Those tokens add fake address numbers to the fuzzy matcher, so keep
the raw address separately and use this cleaned value for matching.
"""
if not address:
return ""
cleaned = str(address).strip()
cleaned = TRAILING_FULL_POSTCODE_RE.sub("", cleaned)
cleaned = TRAILING_OUTCODE_RE.sub("", cleaned)
cleaned = re.sub(r"\s+", " ", cleaned)
cleaned = re.sub(r"\s*,\s*", ", ", cleaned)
return cleaned.strip(" ,")
def transform_property(
prop: dict, outcode: str, pc_index: PostcodeSpatialIndex
) -> dict | None:
@ -224,10 +262,14 @@ def transform_property(
if kf.get("description")
]
postcode = pc_index.nearest(lat, lng)
if not postcode:
inferred_postcode = pc_index.nearest(lat, lng)
if not inferred_postcode:
log.debug("No England postcode for property at %.4f, %.4f — skipping", lat, lng)
return None
raw_address = prop.get("displayAddress", "") or ""
extracted_postcode = extract_full_postcode(raw_address)
postcode = extracted_postcode or inferred_postcode
postcode_source = "address" if extracted_postcode else "coordinates"
property_url = prop.get("propertyUrl") or ""
if not isinstance(property_url, str):
@ -244,7 +286,11 @@ def transform_property(
"lon": lng,
"lat": lat,
"Postcode": postcode,
"Address per Property Register": prop.get("displayAddress", ""),
"Postcode source": postcode_source,
"Extracted postcode": extracted_postcode,
"Inferred postcode": inferred_postcode,
"Listing raw address": raw_address,
"Address per Property Register": clean_listing_address(raw_address),
"Leasehold/Freehold": extract_tenure(prop.get("tenure")),
"Property type": map_property_type(sub_type),
"Property sub-type": normalize_sub_type(sub_type),

View file

@ -37,7 +37,13 @@ from constants import (
ZOOPLA_BASE,
)
from spatial import PostcodeSpatialIndex
from transform import normalize_sub_type, parse_int_value, validate_floor_area
from transform import (
clean_listing_address,
extract_full_postcode,
normalize_sub_type,
parse_int_value,
validate_floor_area,
)
log = logging.getLogger("zoopla")
@ -1031,19 +1037,6 @@ def _resolve_outcode_coords(
return None
def _extract_postcode(text: str) -> str | None:
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.
Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
raw = match.group(1).upper().strip()
# Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA"
if " " not in raw and len(raw) >= 5:
return raw[:-3] + " " + raw[-3:]
return raw
return None
def _extract_outcode(text: str) -> str | None:
"""Extract a UK outcode from address text like 'Whitechapel Road, London E1'."""
# Look for outcode at end of string or after last comma
@ -1123,10 +1116,12 @@ def transform_property(
from postcodes extracted from the address text."""
price = parse_int_value(raw.get("price")) or 0
address = raw.get("address", "")
address = raw.get("address", "") or ""
# Resolve postcode and coordinates from address
postcode = _extract_postcode(address)
extracted_postcode = extract_full_postcode(address)
postcode = extracted_postcode
postcode_source = "address" if extracted_postcode else None
lat = lng = None
if postcode:
@ -1141,12 +1136,14 @@ def transform_property(
result = _resolve_outcode_coords(addr_outcode, pc_coords)
if result:
postcode, lat, lng = result
postcode_source = "address_outcode"
# Final fallback: use the outcode we know we're searching
if lat is None and search_outcode:
result = _resolve_outcode_coords(search_outcode, pc_coords)
if result:
postcode, lat, lng = result
postcode_source = "search_outcode"
if lat is None or lng is None or not postcode:
return None
@ -1189,7 +1186,11 @@ def transform_property(
"lon": lng,
"lat": lat,
"Postcode": postcode,
"Address per Property Register": address,
"Postcode source": postcode_source or "unknown",
"Extracted postcode": extracted_postcode,
"Inferred postcode": postcode if postcode_source != "address" else None,
"Listing raw address": address,
"Address per Property Register": clean_listing_address(address),
"Leasehold/Freehold": raw.get("tenure") or None,
"Property type": _map_property_type(raw.get("property_type")),
"Property sub-type": normalize_sub_type(raw.get("property_type")),

View file

@ -556,6 +556,7 @@ export default function App() {
initialViewState={initialViewState}
initialPOICategories={urlState.poiCategories}
initialOverlays={urlState.overlays}
initialBasemap={urlState.basemap}
initialTab={urlState.tab}
initialLoading={initialLoading}
theme={theme}
@ -661,6 +662,7 @@ export default function App() {
initialViewState={initialViewState}
initialPOICategories={mapUrlState.poiCategories}
initialOverlays={mapUrlState.overlays}
initialBasemap={mapUrlState.basemap}
initialTab={mapUrlState.tab}
initialLoading={initialLoading}
theme={theme}

View file

@ -65,7 +65,7 @@ const DATA_SOURCE_DEFS: DataSourceDef[] = [
},
{
id: 'conservation-areas',
url: 'https://opendata-historicengland.hub.arcgis.com/datasets/historicengland::conservation-areas/explore',
url: 'https://www.planning.data.gov.uk/dataset/conservation-area',
license: 'Open Government Licence v3.0',
},
{

View file

@ -19,6 +19,10 @@ import {
} from '../../lib/format';
import { groupFeaturesByCategory } from '../../lib/features';
import { getPoiCategoryLogoUrl } from '../../lib/map-utils';
import {
getActiveAmenityFilterFeatureNames,
isPoiFilterFeatureName,
} from '../../lib/poi-distance-filter';
import {
PARTY_FEATURE_COLORS,
STACKED_GROUPS,
@ -88,7 +92,7 @@ const STATION_GROUP_NAMES = new Set([STATION_GROUP_NAME, 'Public Transport']);
function MetricTextLabel({ children }: { children: ReactNode }) {
return (
<span className="block truncate text-[13px] font-medium leading-5 text-warm-900 dark:text-warm-100">
<span className="block min-w-0 flex-1 break-words text-[13px] font-medium leading-snug text-warm-900 dark:text-warm-100">
{children}
</span>
);
@ -106,7 +110,7 @@ function MetricFeatureLabel({
aboutLabel: string;
}) {
return (
<div className="flex min-w-0 items-center gap-1.5">
<div className="flex min-w-0 items-start gap-1.5">
<MetricTextLabel>{label ?? ts(feature.name)}</MetricTextLabel>
{feature.detail && (
<button
@ -239,14 +243,40 @@ export default function AreaPane({
const filtersActive = activeFilterCount > 0;
const filteredStatsEmpty = filtersActive && statsUseFilters && stats?.count === 0;
const showFlipToggleCallout = filteredStatsEmpty && unfilteredCount !== 0;
const activeFilterNames = useMemo(() => new Set(Object.keys(filters)), [filters]);
const activeAmenityFeatureNames = useMemo(
() => getActiveAmenityFilterFeatureNames(filters),
[filters]
);
const featureGroups = useMemo(() => groupFeaturesByCategory(globalFeatures), [globalFeatures]);
const paneFeatureGroups = useMemo<FeatureGroup[]>(
() =>
featureGroups
.map((group) => {
if (group.name !== 'Amenities') return group;
const features = group.features.filter((feature) => {
if (isPoiFilterFeatureName(feature.name)) {
return activeAmenityFeatureNames.has(feature.name);
}
return activeFilterNames.has(feature.name);
});
return { ...group, features };
})
.filter((group) => group.name !== 'Amenities' || group.features.length > 0),
[activeAmenityFeatureNames, activeFilterNames, featureGroups]
);
const displayFeatureGroups = useMemo<FeatureGroup[]>(() => {
if (!hexagonLocation || featureGroups.some((group) => STATION_GROUP_NAMES.has(group.name))) {
return featureGroups;
if (
!hexagonLocation ||
paneFeatureGroups.some((group) => STATION_GROUP_NAMES.has(group.name))
) {
return paneFeatureGroups;
}
return [{ name: STATION_GROUP_NAME, features: [] }, ...featureGroups];
}, [featureGroups, hexagonLocation]);
return [{ name: STATION_GROUP_NAME, features: [] }, ...paneFeatureGroups];
}, [paneFeatureGroups, hexagonLocation]);
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
const { scrollRef, onScroll } = useRetainedScrollTop<HTMLDivElement>({
restoreKey: scrollRestoreKey ?? hexagonId,
@ -361,17 +391,17 @@ export default function AreaPane({
</div>
<div className="rounded border border-warm-200 bg-warm-50 px-2.5 py-2 dark:border-navy-700 dark:bg-navy-900">
<div className="flex items-center justify-between gap-2">
<div className="flex flex-wrap items-center justify-between gap-2">
<span className="text-xs font-semibold text-warm-700 dark:text-warm-200">
{t('areaPane.statsBasis')}
</span>
<div className="inline-flex shrink-0 rounded-md bg-warm-200 p-0.5 dark:bg-navy-800">
<div className="grid min-w-0 flex-1 basis-52 grid-cols-2 rounded-md bg-warm-200 p-0.5 dark:bg-navy-800">
<button
type="button"
disabled={!filtersActive}
aria-pressed={statsUseFilters && filtersActive}
onClick={() => onStatsUseFiltersChange(true)}
className={`rounded px-2 py-1 text-xs font-medium ${
className={`min-w-0 rounded px-2 py-1 text-center text-xs font-medium leading-tight break-words ${
statsUseFilters && filtersActive
? 'bg-white text-teal-700 shadow-sm dark:bg-navy-700 dark:text-teal-300'
: 'text-warm-600 hover:text-warm-900 disabled:cursor-not-allowed disabled:opacity-50 dark:text-warm-400 dark:hover:text-warm-100'
@ -383,7 +413,7 @@ export default function AreaPane({
type="button"
aria-pressed={!statsUseFilters || !filtersActive}
onClick={() => onStatsUseFiltersChange(false)}
className={`rounded px-2 py-1 text-xs font-medium ${
className={`min-w-0 rounded px-2 py-1 text-center text-xs font-medium leading-tight break-words ${
!statsUseFilters || !filtersActive
? 'bg-white text-teal-700 shadow-sm dark:bg-navy-700 dark:text-teal-300'
: 'text-warm-600 hover:text-warm-900 dark:text-warm-400 dark:hover:text-warm-100'
@ -426,7 +456,7 @@ export default function AreaPane({
key={`${exclusion.kind}:${exclusion.name}:${exclusion.direction}:${exclusion.category ?? ''}`}
className="rounded bg-white/70 px-2 py-1.5 dark:bg-navy-950/40"
>
<div className="truncate font-medium">
<div className="break-words font-medium leading-snug">
{getExclusionLabel(exclusion)}
</div>
<p className="mt-0.5 text-amber-800/80 dark:text-amber-100/80">
@ -479,7 +509,8 @@ export default function AreaPane({
const hasData = group.features.some(
(feature) => numericByName.has(feature.name) || enumByName.has(feature.name)
);
if (!hasData && !showNearbyStations) return null;
const expanded = isGroupExpanded(group.name);
if (!hasData && !showNearbyStations && stats.count === 0) return null;
const stackedCharts = STACKED_GROUPS[group.name];
const stackedEnumCharts = STACKED_ENUM_GROUPS[group.name];
@ -490,8 +521,6 @@ export default function AreaPane({
) ?? []
);
const expanded = isGroupExpanded(group.name);
return (
<div key={group.name}>
<CollapsibleGroupHeader
@ -560,9 +589,10 @@ export default function AreaPane({
feature={{ ...featureMeta, name: ts(chart.label) }}
onShowInfo={setInfoFeature}
className="mr-2"
wrap
/>
) : (
<span className="text-xs text-warm-700 dark:text-warm-300 truncate mr-2">
<span className="mr-2 min-w-0 break-words text-xs leading-snug text-warm-700 dark:text-warm-300">
{ts(chart.label)}
</span>
)}
@ -634,44 +664,46 @@ export default function AreaPane({
chart={
crimeSeries && crimeSeries.points.length > 1 ? (
<CrimeYearChart points={crimeSeries.points} />
) : (numericStats.histogram &&
(globalHistogram ? (
<DualHistogram
localCounts={numericStats.histogram.counts}
globalCounts={globalHistogram.counts}
p1={numericStats.histogram.p1}
p99={numericStats.histogram.p99}
globalMean={globalMean}
meanLabel={t('areaPane.nationalAvg')}
formatLabel={(v) =>
formatFilterValue(
v,
feature.suffix === '%'
? { raw: feature.raw, suffix: feature.suffix }
: feature.raw
)
}
integerAxisLabels={feature.step === 1}
compact
/>
) : (
<DualHistogram
localCounts={numericStats.histogram.counts}
globalCounts={numericStats.histogram.counts}
p1={numericStats.histogram.p1}
p99={numericStats.histogram.p99}
formatLabel={(v) =>
formatFilterValue(
v,
feature.suffix === '%'
? { raw: feature.raw, suffix: feature.suffix }
: feature.raw
)
}
integerAxisLabels={feature.step === 1}
compact
/>
)))
numericStats.histogram &&
(globalHistogram ? (
<DualHistogram
localCounts={numericStats.histogram.counts}
globalCounts={globalHistogram.counts}
p1={numericStats.histogram.p1}
p99={numericStats.histogram.p99}
globalMean={globalMean}
meanLabel={t('areaPane.nationalAvg')}
formatLabel={(v) =>
formatFilterValue(
v,
feature.suffix === '%'
? { raw: feature.raw, suffix: feature.suffix }
: feature.raw
)
}
integerAxisLabels={feature.step === 1}
compact
/>
) : (
<DualHistogram
localCounts={numericStats.histogram.counts}
globalCounts={numericStats.histogram.counts}
p1={numericStats.histogram.p1}
p99={numericStats.histogram.p99}
formatLabel={(v) =>
formatFilterValue(
v,
feature.suffix === '%'
? { raw: feature.raw, suffix: feature.suffix }
: feature.raw
)
}
integerAxisLabels={feature.step === 1}
compact
/>
))
)
}
value={formatValue(numericStats.mean, feature)}
valueTitle={
@ -690,7 +722,11 @@ export default function AreaPane({
key={feature.name}
className="bg-warm-50 dark:bg-warm-800 rounded p-2"
>
<FeatureLabel feature={feature} onShowInfo={setInfoFeature} />
<FeatureLabel
feature={feature}
onShowInfo={setInfoFeature}
wrap
/>
<EnumBarChart
counts={enumStats.counts}
globalCounts={globalFeature?.counts}
@ -729,9 +765,10 @@ export default function AreaPane({
feature={featureMeta}
onShowInfo={setInfoFeature}
className="mr-2"
wrap
/>
) : (
<span className="text-xs text-warm-700 dark:text-warm-300 truncate mr-2">
<span className="mr-2 min-w-0 break-words text-xs leading-snug text-warm-700 dark:text-warm-300">
{ts(chart.label)}
</span>
)}
@ -769,6 +806,7 @@ export default function AreaPane({
<FeatureLabel
feature={{ ...featureMeta, name: ts(chart.label) }}
onShowInfo={setInfoFeature}
wrap
/>
) : (
<span className="text-xs text-warm-700 dark:text-warm-300">

View file

@ -1,4 +1,4 @@
import { cleanup, render, screen } from '@testing-library/react';
import { cleanup, render, screen, waitFor } from '@testing-library/react';
import { afterEach, describe, expect, it, vi } from 'vitest';
import JourneyInstructions, { googleMapsUrl } from './JourneyInstructions';
@ -12,6 +12,8 @@ vi.mock('react-i18next', () => ({
if (key === 'common.minute') return 'min';
if (key === 'common.loading') return 'Loading';
if (key === 'travel.bestCase') return 'Best case';
if (key === 'travel.noChange') return 'No change';
if (key === 'travel.noBuses') return 'No buses';
if (key === 'areaPane.walk') return 'Walk';
if (key === 'areaPane.cycle') return 'Cycle';
if (key === 'areaPane.viewOnGoogleMaps') return 'View on Google Maps';
@ -24,6 +26,7 @@ vi.mock('react-i18next', () => ({
describe('JourneyInstructions', () => {
afterEach(() => {
cleanup();
vi.unstubAllGlobals();
vi.useRealTimers();
});
@ -39,6 +42,8 @@ describe('JourneyInstructions', () => {
minutes: 42,
bestMinutes: 25,
useBest: true,
noChange: true,
noBuses: true,
legs: [
{ mode: 'walk', minutes: 8 },
{
@ -61,6 +66,8 @@ describe('JourneyInstructions', () => {
);
expect(screen.getByText(/Best case/)).toBeTruthy();
expect(screen.getByText(/No change/)).toBeTruthy();
expect(screen.getByText(/No buses/)).toBeTruthy();
expect(screen.getByText('Jubilee line')).toBeTruthy();
expect(screen.getByText('Northern line')).toBeTruthy();
expect(screen.getByText(/Canary Wharf/)).toBeTruthy();
@ -89,4 +96,46 @@ describe('JourneyInstructions', () => {
expect(parsed.searchParams.get('destination')).toBe('Bank tube station');
});
it('requests journey data with the selected transit variant', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
journey: null,
minutes: 42,
best_minutes: 25,
destination_lat: 51.5132819,
destination_lon: -0.0895555,
}),
});
vi.stubGlobal('fetch', fetchMock);
render(
<JourneyInstructions
postcode="E14 2DG"
entries={[
{
mode: 'transit',
slug: 'bank-tube-station',
label: 'Bank',
timeRange: [0, 60],
useBest: false,
noChange: true,
noBuses: true,
},
]}
showGoogleMapsLink={false}
/>
);
await waitFor(() => expect(fetchMock).toHaveBeenCalled());
const url = fetchMock.mock.calls[0][0] as string;
const parsed = new URL(url, 'http://localhost');
expect(parsed.pathname).toBe('/api/journey');
expect(parsed.searchParams.get('postcode')).toBe('E14 2DG');
expect(parsed.searchParams.get('mode')).toBe('transit-no-change-no-bus');
expect(parsed.searchParams.get('slug')).toBe('bank-tube-station');
await screen.findByText('No change · No buses');
});
});

View file

@ -1,7 +1,7 @@
import { useState, useEffect, useMemo } from 'react';
import { useTranslation } from 'react-i18next';
import type { JourneyLeg } from '../../types';
import type { TravelTimeEntry } from '../../hooks/useTravelTime';
import { resolveTransitVariant, type TravelTimeEntry } from '../../hooks/useTravelTime';
import { apiUrl, authHeaders, logNonAbortError } from '../../lib/api';
import { WalkingIcon } from '../ui/icons/WalkingIcon';
import { BicycleIcon } from '../ui/icons/BicycleIcon';
@ -30,6 +30,8 @@ interface JourneyData {
destinationLon: number | null;
/** Whether the dashboard filter is currently using best-case time. */
useBest: boolean;
noChange: boolean;
noBuses: boolean;
loading: boolean;
}
@ -44,6 +46,8 @@ export interface JourneyInstructionPreset {
destinationLat?: number | null;
destinationLon?: number | null;
useBest?: boolean;
noChange?: boolean;
noBuses?: boolean;
}
// Official TfL line colors + other known London transit
@ -305,14 +309,17 @@ export default function JourneyInstructions({
destinationLat: null,
destinationLon: null,
useBest: e.useBest,
noChange: e.noChange ?? false,
noBuses: e.noBuses ?? false,
loading: true,
}));
setJourneys([...results]);
transitEntries.forEach((entry, idx) => {
const serverMode = resolveTransitVariant(entry);
const params = new URLSearchParams({
postcode,
mode: 'transit',
mode: serverMode,
slug: entry.slug,
});
if (shareCode) params.set('share', shareCode);
@ -367,6 +374,8 @@ export default function JourneyInstructions({
destinationLat: journey.destinationLat ?? null,
destinationLon: journey.destinationLon ?? null,
useBest: journey.useBest ?? false,
noChange: journey.noChange ?? false,
noBuses: journey.noBuses ?? false,
loading: false,
}))
: journeys;
@ -382,20 +391,30 @@ export default function JourneyInstructions({
const legSum = j.legs ? j.legs.reduce((sum, l) => sum + l.minutes, 0) : 0;
const totalMin = j.useBest && j.bestMinutes != null ? j.bestMinutes : (j.minutes ?? legSum);
const isBestCase = j.useBest && j.bestMinutes != null;
const journeyLabels: string[] = [];
if (isBestCase) journeyLabels.push(t('travel.bestCase'));
if (j.noChange) journeyLabels.push(t('travel.noChange'));
if (j.noBuses) journeyLabels.push(t('travel.noBuses'));
const displayLegs = j.legs ? invertLegs(j.legs) : null;
const destination = j.label || j.slug;
return (
<div key={`${j.slug}-${index}`} className="bg-warm-50 dark:bg-warm-800 rounded-lg p-2.5">
<div className="flex items-baseline justify-between mb-2">
<span className="text-xs font-medium text-warm-700 dark:text-warm-300">
<div className="flex items-start justify-between gap-2 mb-2">
<span className="min-w-0 text-xs font-medium text-warm-700 dark:text-warm-300">
{t('areaPane.to', { destination })}
</span>
{!j.loading && totalMin > 0 && (
<span className="text-xs font-semibold text-teal-700 dark:text-teal-400">
{isBestCase ? `${t('travel.bestCase')} · ` : ''}
{totalMin} {t('common.minute')}
</span>
<div className="min-w-0 max-w-[58%] text-right">
{journeyLabels.length > 0 && (
<div className="text-[10px] font-semibold leading-tight text-teal-700 dark:text-teal-400">
{journeyLabels.join(' · ')}
</div>
)}
<div className="text-xs font-semibold leading-tight text-teal-700 dark:text-teal-400">
{totalMin} {t('common.minute')}
</div>
</div>
)}
</div>
{j.loading ? (
@ -441,8 +460,8 @@ export default function JourneyInstructions({
<WalkingIcon className="w-3.5 h-3.5 text-warm-500 dark:text-warm-400 shrink-0" />
)}
<span className="text-xs text-warm-600 dark:text-warm-300">
{isBestCase ? t('travel.bestCase') : t('areaPane.walk')} · {totalMin}{' '}
{t('common.minute')}
{journeyLabels.length > 0 ? journeyLabels.join(' · ') : t('areaPane.walk')} ·{' '}
{totalMin} {t('common.minute')}
</span>
</div>
{showGoogleMapsLink && (

View file

@ -22,9 +22,10 @@ import type {
import {
zoomToResolution,
getBoundsFromViewState,
getVisibleBoundsFromViewState,
getBoundsWithBottomScreenInset,
getMapStyle,
getMapDataBeforeId,
getPoiIconUrl,
getMapCenterForTargetScreenPoint,
} from '../../lib/map-utils';
@ -45,6 +46,7 @@ import { useDeckLayers } from '../../hooks/useDeckLayers';
import { useTranslatedModes, type TravelTimeEntry } from '../../hooks/useTravelTime';
import { ts } from '../../i18n/server';
import type { OverlayId } from '../../lib/overlays';
import type { BasemapId } from '../../lib/basemaps';
interface MapProps {
data: HexagonData[];
@ -52,6 +54,7 @@ interface MapProps {
usePostcodeView: boolean;
pois: POI[];
activeOverlays?: Set<OverlayId>;
basemap?: BasemapId;
actualListings?: ActualListing[];
onViewChange: (params: ViewChangeParams) => void;
viewFeature: string | null;
@ -105,6 +108,130 @@ function formatListingHeadline(listing: ActualListing, t: TFunction): string | n
return parts.length > 0 ? parts.join(' · ') : null;
}
function ListingPopupSingleContent({ listing, t }: { listing: ActualListing; t: TFunction }) {
return (
<a
href={listing.listing_url}
target="_blank"
rel="noopener noreferrer"
className="block px-3 py-2"
>
{listing.asking_price != null && (
<div className="text-base font-bold text-teal-600 dark:text-teal-400">
{formatListingPrice(listing.asking_price)}
{listing.price_qualifier ? (
<span className="ml-1 text-xs font-medium text-warm-500 dark:text-warm-400">
{listing.price_qualifier}
</span>
) : null}
</div>
)}
{formatListingHeadline(listing, t) && (
<div className="text-xs text-warm-700 dark:text-warm-200 mt-0.5">
{formatListingHeadline(listing, t)}
</div>
)}
{listing.address && (
<div className="text-xs text-warm-500 dark:text-warm-400 mt-0.5 line-clamp-2">
{listing.address}
</div>
)}
{listing.postcode && (
<div className="text-[11px] text-warm-400 dark:text-warm-500 mt-0.5">
{listing.postcode}
</div>
)}
{listing.floor_area_sqm != null && (
<div className="text-[11px] text-warm-500 dark:text-warm-400 mt-0.5">
{Math.round(listing.floor_area_sqm)} sqm
{listing.asking_price_per_sqm != null
? ` · £${Math.round(listing.asking_price_per_sqm).toLocaleString()}/sqm`
: ''}
</div>
)}
{listing.features.length > 0 && (
<ul className="mt-1.5 text-[11px] text-warm-600 dark:text-warm-300 list-disc pl-4 space-y-0.5">
{listing.features.slice(0, 3).map((feature, idx) => (
<li key={idx} className="line-clamp-1">
{feature}
</li>
))}
</ul>
)}
<div className="mt-1.5 text-[11px] text-teal-600 dark:text-teal-400 font-medium">
Open listing
</div>
</a>
);
}
function ListingClusterPopupContent({
count,
listings,
t,
}: {
count: number;
listings: ActualListing[];
t: TFunction;
}) {
const visibleCount = listings.length;
return (
<div>
<div className="border-b border-warm-200 px-3 py-2 dark:border-warm-700">
<div className="text-base font-bold text-red-600 dark:text-red-400">
{count.toLocaleString()} listings
</div>
<div className="text-[11px] text-warm-500 dark:text-warm-400">
{visibleCount > 0
? `Showing ${visibleCount.toLocaleString()} of ${count.toLocaleString()}`
: 'Grouped near this map position'}
</div>
</div>
{visibleCount > 0 && (
<div className="max-h-80 overflow-y-auto py-1">
{listings.map((listing, idx) => {
const headline = formatListingHeadline(listing, t);
return (
<a
key={`${listing.listing_url}-${idx}`}
href={listing.listing_url}
target="_blank"
rel="noopener noreferrer"
className="block border-b border-warm-100 px-3 py-2 last:border-b-0 hover:bg-warm-50 dark:border-warm-700 dark:hover:bg-warm-700/60"
>
<div className="flex items-start justify-between gap-3">
<div className="min-w-0">
<div className="text-sm font-semibold text-teal-700 dark:text-teal-300">
{listing.asking_price != null
? formatListingPrice(listing.asking_price)
: 'Listing'}
</div>
{headline && (
<div className="mt-0.5 truncate text-xs text-warm-700 dark:text-warm-200">
{headline}
</div>
)}
{listing.address && (
<div className="mt-0.5 line-clamp-1 text-[11px] text-warm-500 dark:text-warm-400">
{listing.address}
</div>
)}
</div>
{listing.postcode && (
<div className="shrink-0 text-[11px] font-medium text-warm-400 dark:text-warm-500">
{listing.postcode}
</div>
)}
</div>
</a>
);
})}
</div>
)}
</div>
);
}
interface PoiPopupCardData {
name: string;
category: string;
@ -581,6 +708,7 @@ export default memo(function Map({
usePostcodeView,
pois,
activeOverlays = EMPTY_OVERLAYS,
basemap = 'standard',
actualListings = EMPTY_ACTUAL_LISTINGS,
onViewChange,
viewFeature,
@ -665,9 +793,13 @@ export default memo(function Map({
frame = window.requestAnimationFrame(emit);
return;
}
// The bottom sheet can reveal covered map area without a pan/zoom event.
const dataBoundsHeight = dimensions.height + Math.max(0, bottomScreenInset);
const bounds = getBoundsFromViewState(renderedViewState, dimensions.width, dataBoundsHeight);
const bounds = getVisibleBoundsFromViewState(
renderedViewState,
dimensions.width,
dimensions.height,
bottomScreenInset
);
const visibleBounds = bounds;
const resolution = zoomToResolution(renderedViewState.zoom);
const renderedVisibleCenter =
getRenderedVisibleCenter(mapRef.current, dimensions, bottomScreenInset) ??
@ -676,6 +808,7 @@ export default memo(function Map({
onViewChange({
resolution,
bounds,
visibleBounds,
zoom: renderedViewState.zoom,
latitude: renderedViewState.latitude,
longitude: renderedViewState.longitude,
@ -739,7 +872,8 @@ export default memo(function Map({
if (flyToRef) flyToRef.current = handleFlyTo;
const mapStyle = useMemo(() => getMapStyle(theme), [theme]);
const mapStyle = useMemo(() => getMapStyle(theme, basemap), [theme, basemap]);
const mapDataBeforeId = useMemo(() => getMapDataBeforeId(basemap), [basemap]);
const maxBounds = useMemo(
() => getBoundsWithBottomScreenInset(MAP_BOUNDS, MAP_MIN_ZOOM, bottomScreenInset),
[bottomScreenInset]
@ -794,6 +928,7 @@ export default memo(function Map({
currentLocation,
bounds: viewportBounds,
travelTimeEntries,
mapDataBeforeId,
});
const showAutoPoiCards = !screenshotMode && viewState.zoom >= POI_AUTO_CARD_ZOOM_THRESHOLD;
@ -849,6 +984,14 @@ export default memo(function Map({
<OverlayTileLayers activeOverlays={activeOverlays} zoom={viewState.zoom} />
{!screenshotMode && <ScaleControl position="bottom-left" maxWidth={100} unit="metric" />}
</MapGL>
{basemap === 'satellite' && (
<div
className="pointer-events-auto absolute left-2 z-10 max-w-[calc(100%_-_1rem)] rounded bg-white/85 px-1.5 py-0.5 text-[10px] leading-tight text-warm-600 shadow-sm dark:bg-warm-900/85 dark:text-warm-300"
style={{ bottom: bottomScreenInset > 0 ? bottomScreenInset + 8 : 34 }}
>
Sentinel-2 cloudless by EOX, contains modified Copernicus Sentinel data 2024
</div>
)}
{screenshotMode ? (
ogMode ? (
<div className="absolute inset-0 z-20 pointer-events-none flex flex-col">
@ -1019,7 +1162,9 @@ export default memo(function Map({
)}
{listingPopup && (
<div
className="pointer-events-auto absolute bg-white dark:bg-warm-800 rounded-lg shadow-lg text-sm dark:text-white max-w-[280px]"
className={`pointer-events-auto absolute rounded-lg bg-white text-sm shadow-lg dark:bg-warm-800 dark:text-white ${
listingPopup.mode === 'cluster' ? 'w-80 max-w-[calc(100vw-2rem)]' : 'max-w-[280px]'
}`}
style={{
left: listingPopup.x,
top: listingPopup.y - 12,
@ -1029,63 +1174,21 @@ export default memo(function Map({
onMouseLeave={clearListingPopup}
>
<button
type="button"
className="pointer-events-auto absolute -top-2 -right-2 w-5 h-5 flex items-center justify-center rounded-full bg-warm-200 dark:bg-warm-700 text-warm-500 dark:text-warm-400 hover:text-warm-700 dark:hover:text-warm-300 shadow-sm"
onClick={clearListingPopup}
>
<CloseIcon className="w-3 h-3" />
</button>
<a
href={listingPopup.listing.listing_url}
target="_blank"
rel="noopener noreferrer"
className="block px-3 py-2"
>
{listingPopup.listing.asking_price != null && (
<div className="text-base font-bold text-teal-600 dark:text-teal-400">
{formatListingPrice(listingPopup.listing.asking_price)}
{listingPopup.listing.price_qualifier ? (
<span className="ml-1 text-xs font-medium text-warm-500 dark:text-warm-400">
{listingPopup.listing.price_qualifier}
</span>
) : null}
</div>
)}
{formatListingHeadline(listingPopup.listing, t) && (
<div className="text-xs text-warm-700 dark:text-warm-200 mt-0.5">
{formatListingHeadline(listingPopup.listing, t)}
</div>
)}
{listingPopup.listing.address && (
<div className="text-xs text-warm-500 dark:text-warm-400 mt-0.5 line-clamp-2">
{listingPopup.listing.address}
</div>
)}
{listingPopup.listing.postcode && (
<div className="text-[11px] text-warm-400 dark:text-warm-500 mt-0.5">
{listingPopup.listing.postcode}
</div>
)}
{listingPopup.listing.floor_area_sqm != null && (
<div className="text-[11px] text-warm-500 dark:text-warm-400 mt-0.5">
{Math.round(listingPopup.listing.floor_area_sqm)} sqm
{listingPopup.listing.asking_price_per_sqm != null
? ` · £${Math.round(listingPopup.listing.asking_price_per_sqm).toLocaleString()}/sqm`
: ''}
</div>
)}
{listingPopup.listing.features.length > 0 && (
<ul className="mt-1.5 text-[11px] text-warm-600 dark:text-warm-300 list-disc pl-4 space-y-0.5">
{listingPopup.listing.features.slice(0, 3).map((feature, idx) => (
<li key={idx} className="line-clamp-1">
{feature}
</li>
))}
</ul>
)}
<div className="mt-1.5 text-[11px] text-teal-600 dark:text-teal-400 font-medium">
Open listing
</div>
</a>
{listingPopup.mode === 'single' ? (
<ListingPopupSingleContent listing={listingPopup.listing} t={t} />
) : (
<ListingClusterPopupContent
count={listingPopup.count}
listings={listingPopup.listings}
t={t}
/>
)}
</div>
)}
{hoverPosition && hoveredHexagonId && hoveredHexagonId !== selectedHexagonId && (

View file

@ -27,8 +27,14 @@ import { useFilterCounts } from '../../hooks/useFilterCounts';
import { trackEvent } from '../../lib/analytics';
import { INITIAL_VIEW_STATE, POSTCODE_ZOOM_THRESHOLD } from '../../lib/consts';
import type { OverlayId } from '../../lib/overlays';
import type { BasemapId } from '../../lib/basemaps';
import { useLicense } from '../../hooks/useLicense';
import { stateToParams } from '../../lib/url-state';
import { groupFeaturesByCategory } from '../../lib/features';
import {
getActiveAmenityFilterFeatureNames,
isPoiFilterFeatureName,
} from '../../lib/poi-distance-filter';
import {
AreaPane,
Filters,
@ -74,6 +80,7 @@ export default function MapPage({
initialViewState,
initialPOICategories,
initialOverlays,
initialBasemap = 'standard',
initialTab,
initialLoading,
theme,
@ -107,6 +114,7 @@ export default function MapPage({
const [activeOverlays, setActiveOverlays] = useState<Set<OverlayId>>(
() => new Set(initialOverlays ?? [])
);
const [basemap, setBasemap] = useState<BasemapId>(initialBasemap);
const [leftPaneWidth, leftPaneHandlers] = usePaneResize(384, 200, 0.45, 'left');
const [rightPaneWidth, rightPaneHandlers] = usePaneResize(384, 200, 0.45, 'right');
const [mobileDrawerOpen, setMobileDrawerOpen] = useState(false);
@ -229,10 +237,10 @@ export default function MapPage({
noBuses: parsed.noBuses,
slug: tt.slug,
label: tt.label,
timeRange: [
tt.min ?? 0,
Math.min(tt.max ?? MAX_TRAVEL_MINUTES, MAX_TRAVEL_MINUTES),
] as [number, number],
timeRange: [tt.min ?? 0, Math.min(tt.max ?? MAX_TRAVEL_MINUTES, MAX_TRAVEL_MINUTES)] as [
number,
number,
],
useBest: false,
}))
);
@ -300,6 +308,29 @@ export default function MapPage({
const filterCounts = useFilterCounts(filters, features, mapData.bounds, entries, shareCode);
const license = useLicense();
const [isAreaGroupExpanded, toggleAreaGroup] = useCollapsibleGroups(true);
const activeFilterNames = useMemo(() => new Set(Object.keys(filters)), [filters]);
const activeAmenityFeatureNames = useMemo(
() => getActiveAmenityFilterFeatureNames(filters),
[filters]
);
const areaStatsFields = useMemo(
() =>
groupFeaturesByCategory(features)
.filter((group) => isAreaGroupExpanded(group.name))
.flatMap((group) =>
group.features
.filter((feature) => {
if (group.name !== 'Amenities') return true;
if (isPoiFilterFeatureName(feature.name)) {
return activeAmenityFeatureNames.has(feature.name);
}
return activeFilterNames.has(feature.name);
})
.map((feature) => feature.name)
),
[activeAmenityFeatureNames, activeFilterNames, features, isAreaGroupExpanded]
);
const handleTravelTimeSetDestination = useCallback(
(index: number, slug: string, label: string, _lat: number, _lon: number) => {
@ -338,6 +369,7 @@ export default function MapPage({
resolution: mapData.resolution,
usePostcodeView: mapData.usePostcodeView,
travelTimeEntries: entries,
areaStatsFields,
shareCode,
journeyDest,
});
@ -452,7 +484,7 @@ export default function MapPage({
const actualListingsTravelParam = useMemo(() => buildTravelParam(entries), [entries]);
const actualListingsEnabled = !__DEV__ || devActualListingsEnabled;
const { listings: actualListings } = useActualListings(
actualListingsEnabled ? mapData.bounds : null,
actualListingsEnabled ? mapData.visibleBounds : null,
{
filterParam: actualListingsFilterParam,
travelParam: actualListingsTravelParam,
@ -464,7 +496,6 @@ export default function MapPage({
if (!__DEV__) return;
setDevActualListingsEnabled((enabled) => !enabled);
}, []);
const [isAreaGroupExpanded, toggleAreaGroup] = useCollapsibleGroups(true);
useUrlSync(
mapData.currentView,
@ -474,7 +505,8 @@ export default function MapPage({
rightPaneTab,
entries,
shareCode,
activeOverlays
activeOverlays,
basemap
);
useInitialMapPageView(mapData, initialViewState, initialTab, setRightPaneTab);
@ -548,10 +580,12 @@ export default function MapPage({
rightPaneTab,
entries,
shareCode,
activeOverlays
activeOverlays,
basemap
).toString(),
[
activeOverlays,
basemap,
entries,
features,
filters,
@ -596,6 +630,7 @@ export default function MapPage({
ogMode={ogMode}
travelTimeEntries={entries}
activeOverlays={activeOverlays}
basemap={basemap}
/>
);
}
@ -656,6 +691,8 @@ export default function MapPage({
<OverlayPane
selectedOverlays={activeOverlays}
onOverlaysChange={setActiveOverlays}
basemap={basemap}
onBasemapChange={setBasemap}
zoomedIn={overlaysZoomedIn}
onClose={() => setOverlayPaneOpen(false)}
/>
@ -790,6 +827,7 @@ export default function MapPage({
mapData={mapData}
pois={pois}
activeOverlays={activeOverlays}
basemap={basemap}
mapViewFeature={mapViewFeature}
filterRange={filterRange}
viewSource={viewSource}
@ -860,6 +898,7 @@ export default function MapPage({
mapData={mapData}
pois={pois}
activeOverlays={activeOverlays}
basemap={basemap}
mapViewFeature={mapViewFeature}
filterRange={filterRange}
viewSource={viewSource}

View file

@ -1,11 +1,16 @@
import { OVERLAYS, type OverlayId } from '../../lib/overlays';
import { PillGroup } from '../ui/PillGroup';
import { useState } from 'react';
import { BASEMAPS, type BasemapId } from '../../lib/basemaps';
import { OVERLAYS, type OverlayDefinition, type OverlayId } from '../../lib/overlays';
import { PillToggle } from '../ui/PillToggle';
import { CloseIcon } from '../ui/icons';
import { IconButton } from '../ui/IconButton';
import InfoPopup from '../ui/InfoPopup';
import { CloseIcon, InfoIcon } from '../ui/icons';
interface OverlayPaneProps {
selectedOverlays: Set<OverlayId>;
onOverlaysChange: (overlays: Set<OverlayId>) => void;
basemap: BasemapId;
onBasemapChange: (basemap: BasemapId) => void;
zoomedIn: boolean;
onClose?: () => void;
}
@ -13,9 +18,13 @@ interface OverlayPaneProps {
export default function OverlayPane({
selectedOverlays,
onOverlaysChange,
basemap,
onBasemapChange,
zoomedIn,
onClose,
}: OverlayPaneProps) {
const [infoOverlay, setInfoOverlay] = useState<OverlayDefinition | null>(null);
const toggleOverlay = (overlay: OverlayId) => {
const next = new Set(selectedOverlays);
if (next.has(overlay)) {
@ -28,6 +37,8 @@ export default function OverlayPane({
const selectNone = () => onOverlaysChange(new Set());
const showZoomWarning = !zoomedIn && selectedOverlays.size > 0;
return (
<div className="flex h-full min-h-0 flex-col overflow-hidden bg-white shadow-lg dark:bg-warm-900">
<div className="flex-shrink-0 px-3 pt-3 pb-2">
@ -56,26 +67,68 @@ export default function OverlayPane({
)}
</div>
</div>
{!zoomedIn && (
<div className="mt-2 rounded border border-warm-200 bg-warm-50 px-2 py-1.5 text-xs text-warm-500 dark:border-warm-700 dark:bg-navy-950 dark:text-warm-400">
Zoom in to view overlays.
{showZoomWarning && (
<div
role="alert"
className="mt-2 rounded border border-amber-300 bg-amber-50 px-2 py-1.5 text-xs text-amber-800 dark:border-amber-700/60 dark:bg-amber-900/30 dark:text-amber-200"
>
Zoom in further to see the selected{' '}
{selectedOverlays.size === 1 ? 'overlay' : 'overlays'}.
</div>
)}
</div>
<div className="min-h-0 flex-1 overflow-y-auto overscroll-contain border-t border-warm-200 px-3 py-3 dark:border-warm-700">
<PillGroup className="flex-wrap overflow-x-visible">
{OVERLAYS.map((overlay) => (
<PillToggle
key={overlay.id}
label={overlay.label}
active={selectedOverlays.has(overlay.id)}
onClick={() => toggleOverlay(overlay.id)}
size="sm"
/>
))}
</PillGroup>
<div className="min-h-0 flex-1 space-y-4 overflow-y-auto overscroll-contain border-t border-warm-200 px-3 py-3 dark:border-warm-700">
<div>
<div className="mb-2 text-[10px] font-semibold uppercase tracking-wide text-warm-400 dark:text-warm-500">
Base map
</div>
<div className="flex flex-wrap gap-1.5">
{BASEMAPS.map((option) => (
<PillToggle
key={option.id}
label={option.label}
active={basemap === option.id}
onClick={() => onBasemapChange(option.id)}
size="sm"
/>
))}
</div>
</div>
<div>
<div className="mb-2 text-[10px] font-semibold uppercase tracking-wide text-warm-400 dark:text-warm-500">
Data overlays
</div>
<div className="flex flex-wrap gap-1.5">
{OVERLAYS.map((overlay) => (
<div key={overlay.id} className="inline-flex items-center gap-0.5">
<PillToggle
label={overlay.label}
active={selectedOverlays.has(overlay.id)}
onClick={() => toggleOverlay(overlay.id)}
size="sm"
/>
<IconButton
onClick={() => setInfoOverlay(overlay)}
title={`About ${overlay.label}`}
ariaLabel={`About ${overlay.label}`}
>
<InfoIcon className="h-3.5 w-3.5" />
</IconButton>
</div>
))}
</div>
</div>
</div>
{infoOverlay && (
<InfoPopup title={infoOverlay.label} onClose={() => setInfoOverlay(null)}>
<p className="text-sm text-warm-700 dark:text-warm-300 leading-relaxed">
{infoOverlay.detail}
</p>
</InfoPopup>
)}
</div>
);
}

View file

@ -93,14 +93,14 @@ export function TravelTimeCard({
className={`space-y-2 px-2 py-2 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
{/* Header */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<ModeIcon className="w-4 h-4 text-teal-600 dark:text-teal-400" />
<span className="text-sm font-medium text-navy-950 dark:text-warm-100">
<div className="flex items-start justify-between gap-1">
<div className="flex min-w-0 items-start gap-2">
<ModeIcon className="w-4 h-4 shrink-0 text-teal-600 dark:text-teal-400" />
<span className="min-w-0 flex-1 break-words text-sm font-medium leading-snug text-navy-950 dark:text-warm-100">
{t('travel.travelTime', { mode: modes.label(mode) })}
</span>
</div>
<div className="flex items-center gap-2 md:gap-0.5">
<div className="flex shrink-0 items-center gap-2 md:gap-0.5">
<IconButton onClick={() => setShowInfo(true)} title={t('filters.aboutData')} size="md">
<InfoIcon className="w-5 h-5 md:w-3.5 md:h-3.5" />
</IconButton>
@ -133,8 +133,8 @@ export function TravelTimeCard({
{/* Transit-only toggles — shown when destination is set */}
{slug && mode === 'transit' && (
<div className="flex flex-wrap items-center gap-x-3 gap-y-1.5">
<div className="flex items-center gap-1.5">
<div className="flex flex-wrap items-center gap-x-4 gap-y-1.5">
<div className="flex items-center gap-0.5">
<PillToggle
label={t('travel.bestCase')}
active={useBest}
@ -145,7 +145,7 @@ export function TravelTimeCard({
<InfoIcon className="w-3 h-3" />
</IconButton>
</div>
<div className="flex items-center gap-1.5">
<div className="flex items-center gap-0.5">
<PillToggle
label={t('travel.noChange')}
active={noChange}
@ -156,7 +156,7 @@ export function TravelTimeCard({
<InfoIcon className="w-3 h-3" />
</IconButton>
</div>
<div className="flex items-center gap-1.5">
<div className="flex items-center gap-0.5">
<PillToggle
label={t('travel.noBuses')}
active={noBuses}

View file

@ -137,6 +137,7 @@ export function ElectionVoteShareFilterCard({
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={selectedFeature}

View file

@ -37,8 +37,8 @@ export function EnumFeatureFilterCard({
data-filter-name={feature.name}
className={`space-y-0.5 px-2 py-1.5 rounded ${pinnedFeature === feature.name ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="flex items-center justify-between">
<FeatureLabel feature={feature} size="sm" />
<div className="flex items-start justify-between gap-1">
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" wrap />
<FeatureActions
feature={feature}
isPinned={pinnedFeature === feature.name}

View file

@ -133,6 +133,7 @@ export function EthnicityFilterCard({
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={selectedFeature}

View file

@ -80,7 +80,13 @@ export function NumericFeatureFilterCard({
className={`space-y-0.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="relative z-10 flex items-center justify-between gap-1">
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" hideIconOnMobile />
<FeatureLabel
feature={feature}
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={feature}
isPinned={isPinned}

View file

@ -137,7 +137,13 @@ export function PoiDistanceFilterCard({
}`}
>
<div className="relative z-10 flex items-center justify-between gap-1">
<FeatureLabel feature={poiMeta} size="sm" className="min-w-0 shrink" hideIconOnMobile />
<FeatureLabel
feature={poiMeta}
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={selectedFeature}
actionName={poiFeature.name}

View file

@ -112,7 +112,13 @@ export function SchoolFilterCard({
className={`space-y-1.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="relative z-10 flex items-center justify-between gap-1">
<FeatureLabel feature={schoolMeta} size="sm" className="min-w-0 shrink" hideIconOnMobile />
<FeatureLabel
feature={schoolMeta}
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={schoolMeta}
isPinned={isPinned}

View file

@ -133,6 +133,7 @@ export function SpecificCrimeFilterCard({
size="sm"
className="min-w-0 shrink"
hideIconOnMobile
wrap
/>
<FeatureActions
feature={selectedFeature}

View file

@ -14,6 +14,7 @@ import type { useTutorial } from '../../../hooks/useTutorial';
import type { TravelTimeEntry } from '../../../hooks/useTravelTime';
import type { getTutorialStyles } from '../../../lib/tutorial-styles';
import type { OverlayId } from '../../../lib/overlays';
import type { BasemapId } from '../../../lib/basemaps';
import type { SearchedLocation } from '../LocationSearch';
import { MapPinIcon } from '../../ui/icons/MapPinIcon';
import { EyeIcon } from '../../ui/icons/EyeIcon';
@ -39,6 +40,7 @@ interface DesktopMapPageProps {
mapData: MapData;
pois: POI[];
activeOverlays: Set<OverlayId>;
basemap: BasemapId;
mapViewFeature: string | null;
filterRange: [number, number] | null;
viewSource: 'drag' | 'eye' | null;
@ -91,6 +93,7 @@ export function DesktopMapPage({
mapData,
pois,
activeOverlays,
basemap,
mapViewFeature,
filterRange,
viewSource,
@ -184,6 +187,7 @@ export function DesktopMapPage({
usePostcodeView={mapData.usePostcodeView}
pois={pois}
activeOverlays={activeOverlays}
basemap={basemap}
onViewChange={mapData.handleViewChange}
viewFeature={mapViewFeature}
colorRange={mapData.colorRange}
@ -224,7 +228,9 @@ export function DesktopMapPage({
className={`flex items-center gap-2 rounded-lg bg-white px-3 py-2 shadow-lg dark:bg-warm-800 ${actualListingsEnabled ? 'text-red-600 hover:text-red-700 dark:text-red-400 dark:hover:text-red-300' : 'text-warm-500 hover:text-red-600 dark:text-warm-400 dark:hover:text-red-400'}`}
>
<HouseIcon className="h-5 w-5" />
<span className="text-sm font-medium">Listings</span>
<span className="text-sm font-medium">
Listings{actualListingsEnabled ? ` (${actualListings.length})` : ''}
</span>
</button>
)}
<button
@ -244,7 +250,7 @@ export function DesktopMapPage({
</button>
</div>
{overlayPaneOpen && (
<div className="absolute bottom-28 right-4 z-10 flex h-[220px] min-h-0 w-80 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
<div className="absolute bottom-28 right-4 z-10 flex h-[260px] min-h-0 w-80 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
{overlayPane}
</div>
)}

View file

@ -11,6 +11,7 @@ import type {
import type { useMapData } from '../../../hooks/useMapData';
import type { TravelTimeEntry } from '../../../hooks/useTravelTime';
import type { OverlayId } from '../../../lib/overlays';
import type { BasemapId } from '../../../lib/basemaps';
import type { SearchedLocation } from '../LocationSearch';
import MobileBottomSheet from '../MobileBottomSheet';
import { MapPinIcon } from '../../ui/icons/MapPinIcon';
@ -30,6 +31,7 @@ interface MobileMapPageProps {
mapData: MapData;
pois: POI[];
activeOverlays: Set<OverlayId>;
basemap: BasemapId;
mapViewFeature: string | null;
filterRange: [number, number] | null;
viewSource: 'drag' | 'eye' | null;
@ -79,6 +81,7 @@ export function MobileMapPage({
mapData,
pois,
activeOverlays,
basemap,
mapViewFeature,
filterRange,
viewSource,
@ -135,6 +138,7 @@ export function MobileMapPage({
usePostcodeView={mapData.usePostcodeView}
pois={pois}
activeOverlays={activeOverlays}
basemap={basemap}
onViewChange={mapData.handleViewChange}
viewFeature={mapViewFeature}
colorRange={mapData.colorRange}
@ -196,7 +200,7 @@ export function MobileMapPage({
</div>
{overlayPaneOpen && (
<div className="absolute top-24 right-3 left-3 z-20 flex h-[220px] min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
<div className="absolute top-24 right-3 left-3 z-20 flex h-[260px] min-h-0 flex-col overflow-hidden rounded-lg border border-warm-200 bg-white shadow-xl dark:border-warm-700 dark:bg-warm-900">
{overlayPane}
</div>
)}

View file

@ -4,6 +4,7 @@ import type { FeatureMeta, ViewState } from '../../../types';
import type { useMapData } from '../../../hooks/useMapData';
import type { TravelTimeEntry } from '../../../hooks/useTravelTime';
import type { OverlayId } from '../../../lib/overlays';
import type { BasemapId } from '../../../lib/basemaps';
import { MapFallback } from './Fallbacks';
import { Map } from './lazyComponents';
@ -20,6 +21,7 @@ interface ScreenshotMapPageProps {
ogMode?: boolean;
travelTimeEntries: TravelTimeEntry[];
activeOverlays: Set<OverlayId>;
basemap: BasemapId;
}
export function ScreenshotMapPage({
@ -33,6 +35,7 @@ export function ScreenshotMapPage({
ogMode,
travelTimeEntries,
activeOverlays,
basemap,
}: ScreenshotMapPageProps) {
return (
<div className="h-full w-full">
@ -43,6 +46,7 @@ export function ScreenshotMapPage({
usePostcodeView={mapData.usePostcodeView}
pois={[]}
activeOverlays={activeOverlays}
basemap={basemap}
onViewChange={mapData.handleViewChange}
viewFeature={mapViewFeature}
colorRange={mapData.colorRange}

View file

@ -7,6 +7,7 @@ import type {
} from '../../../types';
import type { TravelTimeInitial } from '../../../hooks/useTravelTime';
import type { OverlayId } from '../../../lib/overlays';
import type { BasemapId } from '../../../lib/basemaps';
import type { Page } from '../../ui/Header';
import type { PointerEvent } from 'react';
@ -27,6 +28,7 @@ export interface MapPageProps {
initialViewState: ViewState;
initialPOICategories: Set<string>;
initialOverlays?: Set<OverlayId>;
initialBasemap?: BasemapId;
initialTab: 'properties' | 'area';
initialLoading: boolean;
theme: 'light' | 'dark';

View file

@ -13,6 +13,7 @@ interface FeatureLabelProps {
description?: string;
label?: string;
hideIconOnMobile?: boolean;
wrap?: boolean;
}
export function FeatureLabel({
@ -23,10 +24,12 @@ export function FeatureLabel({
description,
label,
hideIconOnMobile,
wrap = false,
}: FeatureLabelProps) {
const { t } = useTranslation();
const textClass = size === 'sm' ? 'text-sm' : 'text-xs';
const gapClass = size === 'sm' ? 'gap-2' : 'gap-1';
const alignmentClass = wrap ? 'items-start' : size === 'xs' ? 'items-center' : 'items-start';
const mobileHide = hideIconOnMobile ? 'hidden md:block ' : '';
const iconClass = `${mobileHide}w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0`;
const featureIcon = getFeatureIcon(feature.name, iconClass);
@ -38,7 +41,11 @@ export function FeatureLabel({
const nameContent = (
<>
<span
className={`${textClass} ${size === 'sm' ? 'font-medium text-navy-950 dark:text-warm-100' : 'text-warm-700 dark:text-warm-300 truncate'}`}
className={`${textClass} ${
size === 'sm'
? 'font-medium text-navy-950 dark:text-warm-100'
: 'text-warm-700 dark:text-warm-300'
} ${wrap ? 'min-w-0 flex-1 break-words leading-snug' : size === 'xs' ? 'truncate' : ''}`}
>
{translatedName}
</span>
@ -56,14 +63,14 @@ export function FeatureLabel({
);
return (
<div
className={`flex ${size === 'xs' ? 'items-center' : 'items-start'} ${gapClass} min-w-0 ${className}`}
>
<div className={`flex ${alignmentClass} ${gapClass} min-w-0 ${className}`}>
{featureIcon}
{GroupIcon && <GroupIcon className={iconClass} />}
{translatedDesc ? (
<div className="min-w-0">
<div className="flex items-center gap-1">{nameContent}</div>
<div className={`flex ${wrap ? 'items-start' : 'items-center'} gap-1`}>
{nameContent}
</div>
<span className="text-xs text-warm-400 dark:text-warm-500 block">{translatedDesc}</span>
</div>
) : (

View file

@ -46,6 +46,7 @@ interface UseDeckLayersProps {
currentLocation?: { lat: number; lng: number } | null;
bounds?: Bounds | null;
travelTimeEntries?: TravelTimeEntry[];
mapDataBeforeId: string;
}
/** Normalize a distribution count array to [0..1] ratios, padded to 10 values. */
@ -88,6 +89,7 @@ export function useDeckLayers({
currentLocation,
bounds: viewportBounds,
travelTimeEntries = [],
mapDataBeforeId,
}: UseDeckLayersProps) {
const [hoverPosition, setHoverPosition] = useState<{ x: number; y: number } | null>(null);
const [hoveredPostcode, setHoveredPostcode] = useState<string | null>(null);
@ -419,10 +421,10 @@ export function useDeckLayers({
highPrecision: true,
onClick: handleHexagonClick,
onHover: handleHexagonHover,
beforeId: 'landuse_park',
beforeId: mapDataBeforeId,
...pieProps,
});
}, [data, colorTrigger, handleHexagonClick, handleHexagonHover]);
}, [data, colorTrigger, handleHexagonClick, handleHexagonHover, mapDataBeforeId]);
const postcodeLayer = useMemo(() => {
const isEnum = enumCountRef.current > 0;
@ -578,9 +580,15 @@ export function useDeckLayers({
onClick: handlePostcodeClick,
onHover: handlePostcodeHoverCallback,
// @ts-expect-error beforeId is a MapboxOverlay interleave prop, not typed in LayerProps
beforeId: 'landuse_park',
beforeId: mapDataBeforeId,
});
}, [postcodeData, postcodeColorTrigger, handlePostcodeClick, handlePostcodeHoverCallback]);
}, [
postcodeData,
postcodeColorTrigger,
handlePostcodeClick,
handlePostcodeHoverCallback,
mapDataBeforeId,
]);
const labeledPostcodeData = useMemo(
() => postcodeData.filter((feature) => feature.properties.count > 0),

View file

@ -3,6 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { useHexagonSelection } from './useHexagonSelection';
import type { FeatureMeta, HexagonStatsResponse, PostcodeGeometry } from '../types';
import type { TravelTimeEntry } from './useTravelTime';
vi.mock('../lib/pocketbase', () => ({
default: { authStore: { isValid: false, token: '' } },
@ -41,9 +42,24 @@ function jsonResponse(body: unknown): Response {
});
}
async function flushPromises() {
await Promise.resolve();
await Promise.resolve();
}
describe('useHexagonSelection', () => {
const requests: string[] = [];
const features: FeatureMeta[] = [{ name: 'Price', type: 'numeric', min: 0, max: 100 }];
const features: FeatureMeta[] = [
{ name: 'Price', type: 'numeric', min: 0, max: 100 },
{ name: 'Last known price', type: 'numeric', min: 0, max: 1_000_000 },
{ name: 'Estimated current price', type: 'numeric', min: 0, max: 1_000_000 },
{ name: 'Price per sqm', type: 'numeric', min: 0, max: 20_000 },
{ name: 'Est. price per sqm', type: 'numeric', min: 0, max: 20_000 },
{ name: 'Total floor area (sqm)', type: 'numeric', min: 0, max: 500 },
{ name: 'Number of bedrooms & living rooms', type: 'numeric', min: 0, max: 12 },
{ name: 'Construction year', type: 'numeric', min: 0, max: 2026 },
{ name: 'Date of last transaction', type: 'numeric', min: 0, max: 2026 },
];
beforeEach(() => {
requests.length = 0;
@ -64,6 +80,18 @@ describe('useHexagonSelection', () => {
return Promise.resolve(jsonResponse(stats(12)));
}
if (url.pathname === '/api/postcode-properties') {
return Promise.resolve(
jsonResponse({ properties: [], total: 0, offset: 0, truncated: false })
);
}
if (url.pathname === '/api/hexagon-properties') {
return Promise.resolve(
jsonResponse({ properties: [], total: 0, offset: 0, truncated: false })
);
}
return Promise.resolve(new Response(null, { status: 404 }));
})
);
@ -201,4 +229,203 @@ describe('useHexagonSelection', () => {
expect(requests.some((url) => url.startsWith('/api/postcode/'))).toBe(false);
expect(requests.some((url) => url.startsWith('/api/hexagon-stats'))).toBe(false);
});
it('passes area stat field projections to stats requests', async () => {
const { result } = renderHook(() =>
useHexagonSelection({
filters: {},
features,
hexagonData: [],
resolution: 9,
usePostcodeView: false,
travelTimeEntries: [],
areaStatsFields: ['Price'],
})
);
act(() => {
result.current.handleHexagonClick('89195da49abffff');
});
await waitFor(() => {
expect(result.current.areaStats?.count).toBe(12);
});
const statsRequest = requests.find((url) => url.startsWith('/api/hexagon-stats'));
expect(statsRequest).toBeDefined();
expect(new URL(statsRequest!, 'http://localhost').searchParams.get('fields')).toBe('Price');
});
it('keeps existing area stats visible while area field projections refetch', async () => {
const pendingStatsRequests: Array<{ resolve: (response: Response) => void }> = [];
vi.stubGlobal(
'fetch',
vi.fn((input: string | URL | Request) => {
const url = new URL(String(input), 'http://localhost');
requests.push(`${url.pathname}${url.search}`);
if (url.pathname === '/api/hexagon-stats') {
return new Promise<Response>((resolve) => {
pendingStatsRequests.push({ resolve });
});
}
return Promise.resolve(new Response(null, { status: 404 }));
})
);
const { result, rerender } = renderHook(
({ areaStatsFields }: { areaStatsFields: string[] }) =>
useHexagonSelection({
filters: {},
features,
hexagonData: [],
resolution: 9,
usePostcodeView: false,
travelTimeEntries: [],
areaStatsFields,
}),
{ initialProps: { areaStatsFields: [] as string[] } }
);
act(() => {
result.current.handleHexagonClick('89195da49abffff');
});
await waitFor(() => {
expect(pendingStatsRequests).toHaveLength(1);
});
await act(async () => {
pendingStatsRequests[0].resolve(jsonResponse(stats(12)));
await flushPromises();
});
await waitFor(() => {
expect(result.current.areaStats?.count).toBe(12);
expect(result.current.loadingAreaStats).toBe(false);
});
act(() => {
rerender({ areaStatsFields: ['Price'] });
});
await waitFor(() => {
expect(pendingStatsRequests).toHaveLength(2);
});
expect(result.current.loadingAreaStats).toBe(true);
expect(result.current.areaStats?.count).toBe(12);
const refetchRequest = requests.filter((url) => url.startsWith('/api/hexagon-stats'))[1];
expect(new URL(refetchRequest, 'http://localhost').searchParams.get('fields')).toBe('Price');
await act(async () => {
pendingStatsRequests[1].resolve(jsonResponse(stats(12)));
await flushPromises();
});
await waitFor(() => {
expect(result.current.loadingAreaStats).toBe(false);
expect(result.current.areaStats?.count).toBe(12);
});
});
it('passes property card field projections to property requests', async () => {
const { result } = renderHook(() =>
useHexagonSelection({
filters: {},
features,
hexagonData: [],
resolution: 9,
usePostcodeView: true,
travelTimeEntries: [],
})
);
act(() => {
result.current.handleLocationSearch('SW1A 1AA', postcodeGeometry, 51.505, -0.115);
});
await waitFor(() => {
expect(result.current.areaStats?.count).toBe(4);
});
act(() => {
result.current.handlePropertiesTabClick();
});
await waitFor(() => {
expect(requests.some((url) => url.startsWith('/api/postcode-properties'))).toBe(true);
});
const propertiesRequest = requests.find((url) => url.startsWith('/api/postcode-properties'));
const fieldsParam = new URL(propertiesRequest!, 'http://localhost').searchParams.get('fields');
expect(fieldsParam).toContain('Last known price');
expect(fieldsParam).toContain('Date of last transaction');
expect(fieldsParam).not.toContain('Distance to nearest amenity');
});
it('refetches property requests when stats basis switches to all properties', async () => {
const propertyFilters = { Price: [0, 50] as [number, number] };
const travelTimeEntries: TravelTimeEntry[] = [
{
mode: 'transit',
slug: 'kings-cross',
label: 'Kings Cross',
timeRange: [0, 30],
useBest: false,
},
];
const { result } = renderHook(() =>
useHexagonSelection({
filters: propertyFilters,
features,
hexagonData: [],
resolution: 9,
usePostcodeView: true,
travelTimeEntries,
})
);
act(() => {
result.current.handleLocationSearch('SW1A 1AA', postcodeGeometry, 51.505, -0.115);
});
await waitFor(() => {
expect(result.current.areaStats?.count).toBe(0);
});
act(() => {
result.current.handlePropertiesTabClick();
});
await waitFor(() => {
expect(requests.filter((url) => url.startsWith('/api/postcode-properties')).length).toBe(1);
});
const filteredPropertiesRequest = requests.find((url) =>
url.startsWith('/api/postcode-properties')
);
const filteredParams = new URL(filteredPropertiesRequest!, 'http://localhost').searchParams;
expect(filteredParams.has('filters')).toBe(true);
expect(filteredParams.has('travel')).toBe(true);
act(() => {
result.current.setAreaStatsUseFilters(false);
});
await waitFor(() => {
expect(result.current.areaStats?.count).toBe(4);
});
await waitFor(() => {
expect(requests.filter((url) => url.startsWith('/api/postcode-properties')).length).toBe(2);
});
const propertyRequests = requests.filter((url) => url.startsWith('/api/postcode-properties'));
const allPropertiesRequest = propertyRequests[propertyRequests.length - 1];
const allPropertiesParams = new URL(allPropertiesRequest, 'http://localhost').searchParams;
expect(allPropertiesParams.has('filters')).toBe(false);
expect(allPropertiesParams.has('travel')).toBe(false);
});
});

View file

@ -42,11 +42,23 @@ interface UseHexagonSelectionOptions {
resolution: number;
usePostcodeView: boolean;
travelTimeEntries: TravelTimeEntry[];
areaStatsFields?: string[];
shareCode?: string;
/** First transit destination — used to pick the best central_postcode for journey display. */
journeyDest?: JourneyDest | null;
}
const PROPERTY_PANE_FIELDS = [
'Last known price',
'Estimated current price',
'Price per sqm',
'Est. price per sqm',
'Total floor area (sqm)',
'Number of bedrooms & living rooms',
'Construction year',
'Date of last transaction',
];
export function useHexagonSelection({
filters,
features,
@ -54,6 +66,7 @@ export function useHexagonSelection({
resolution,
usePostcodeView,
travelTimeEntries,
areaStatsFields,
shareCode,
journeyDest,
}: UseHexagonSelectionOptions) {
@ -93,6 +106,11 @@ export function useHexagonSelection({
}, []);
const travelParam = useMemo(() => buildTravelParam(travelTimeEntries), [travelTimeEntries]);
const areaStatsFieldsKey = useMemo(() => areaStatsFields?.join(';;') ?? '', [areaStatsFields]);
const propertyPaneFieldsParam = useMemo(() => {
const availableFields = new Set(features.map((feature) => feature.name));
return PROPERTY_PANE_FIELDS.filter((field) => availableFields.has(field)).join(';;');
}, [features]);
const fetchHexagonStats = useCallback(
async (
@ -110,8 +128,9 @@ export function useHexagonSelection({
if (filterStr) params.append('filters', filterStr);
if (includeFilters && travelParam) params.set('travel', travelParam);
if (shareCode) params.set('share', shareCode);
if (fields) {
params.set('fields', fields.join(';;'));
const requestedFields = fields ?? areaStatsFields;
if (requestedFields) {
params.set('fields', requestedFields.join(';;'));
}
if (journeyDest) {
params.set('journey_mode', journeyDest.mode);
@ -121,27 +140,34 @@ export function useHexagonSelection({
assertOk(response, 'hexagon-stats');
return (await response.json()) as HexagonStatsResponse;
},
[filters, features, journeyDest, shareCode, travelParam]
[areaStatsFields, filters, features, journeyDest, shareCode, travelParam]
);
const fetchPostcodeStats = useCallback(
async (postcode: string, signal?: AbortSignal, includeFilters = true) => {
async (
postcode: string,
signal?: AbortSignal,
includeFilters = true,
fields?: string[]
) => {
const params = new URLSearchParams({ postcode });
const filterStr = includeFilters ? buildFilterString(filters, features) : '';
if (filterStr) params.append('filters', filterStr);
if (includeFilters && travelParam) params.set('travel', travelParam);
if (shareCode) params.set('share', shareCode);
const requestedFields = fields ?? areaStatsFields;
if (requestedFields) params.set('fields', requestedFields.join(';;'));
const response = await fetch(apiUrl('postcode-stats', params), authHeaders({ signal }));
assertOk(response, 'postcode-stats');
return (await response.json()) as HexagonStatsResponse;
},
[filters, features, shareCode, travelParam]
[areaStatsFields, filters, features, shareCode, travelParam]
);
const filterStr = useMemo(() => buildFilterString(filters, features), [filters, features]);
const hasStatsFilters = filterStr.length > 0 || travelParam.length > 0;
const journeyKey = journeyDest ? `${journeyDest.mode}:${journeyDest.slug}` : '';
const areaStatsQueryKey = useMemo(
const areaStatsDataKey = useMemo(
() =>
[
areaStatsUseFilters ? 'filtered' : 'all',
@ -152,6 +178,10 @@ export function useHexagonSelection({
].join('|'),
[areaStatsUseFilters, filterStr, journeyKey, shareCode, travelParam]
);
const areaStatsQueryKey = useMemo(
() => [areaStatsDataKey, areaStatsFieldsKey].join('|'),
[areaStatsDataKey, areaStatsFieldsKey]
);
const fetchUnfilteredAreaCount = useCallback(
async (selection: SelectedHexagon, requestId: number, signal?: AbortSignal) => {
@ -162,8 +192,8 @@ export function useHexagonSelection({
const stats =
selection.type === 'postcode'
? await fetchPostcodeStats(selection.id, signal, false)
: await fetchHexagonStats(selection.id, selection.resolution, signal, undefined, false);
? await fetchPostcodeStats(selection.id, signal, false, [])
: await fetchHexagonStats(selection.id, selection.resolution, signal, [], false);
if (isCurrentAreaRequest(requestId)) setUnfilteredAreaCount(stats.count);
},
[fetchHexagonStats, fetchPostcodeStats, hasStatsFilters, isCurrentAreaRequest]
@ -209,9 +239,10 @@ export function useHexagonSelection({
offset: offset.toString(),
});
const filterStr = buildFilterString(filters, features);
const filterStr = areaStatsUseFilters ? buildFilterString(filters, features) : '';
if (filterStr) params.append('filters', filterStr);
if (travelParam) params.set('travel', travelParam);
if (areaStatsUseFilters && travelParam) params.set('travel', travelParam);
params.set('fields', propertyPaneFieldsParam);
if (shareCode) params.set('share', shareCode);
const response = await fetch(apiUrl('hexagon-properties', params), authHeaders());
@ -235,8 +266,10 @@ export function useHexagonSelection({
[
filters,
features,
areaStatsUseFilters,
invalidatePropertyRequests,
isCurrentPropertyRequest,
propertyPaneFieldsParam,
shareCode,
travelParam,
]
@ -255,9 +288,10 @@ export function useHexagonSelection({
params.set('focus_address', focusAddress);
}
const filterStr = buildFilterString(filters, features);
const filterStr = areaStatsUseFilters ? buildFilterString(filters, features) : '';
if (filterStr) params.append('filters', filterStr);
if (travelParam) params.set('travel', travelParam);
if (areaStatsUseFilters && travelParam) params.set('travel', travelParam);
params.set('fields', propertyPaneFieldsParam);
if (shareCode) params.set('share', shareCode);
const response = await fetch(apiUrl('postcode-properties', params), authHeaders());
@ -281,8 +315,10 @@ export function useHexagonSelection({
[
filters,
features,
areaStatsUseFilters,
invalidatePropertyRequests,
isCurrentPropertyRequest,
propertyPaneFieldsParam,
shareCode,
travelParam,
]
@ -546,25 +582,34 @@ export function useHexagonSelection({
rightPaneTab,
]);
// Re-fetch stats when filters or travel constraints change while an area is selected
const prevAreaStatsQueryKey = useRef(areaStatsQueryKey);
// Re-fetch stats when the selected stats basis or requested field projection changes.
const prevAreaStatsQueryRef = useRef({
dataKey: areaStatsDataKey,
queryKey: areaStatsQueryKey,
});
useEffect(() => {
if (prevAreaStatsQueryKey.current === areaStatsQueryKey) return;
prevAreaStatsQueryKey.current = areaStatsQueryKey;
const previousQuery = prevAreaStatsQueryRef.current;
if (previousQuery.queryKey === areaStatsQueryKey) return;
prevAreaStatsQueryRef.current = {
dataKey: areaStatsDataKey,
queryKey: areaStatsQueryKey,
};
if (!selectedHexagon) return;
const fieldProjectionOnlyChanged = previousQuery.dataKey === areaStatsDataKey;
// Clear stale properties
setProperties([]);
setPropertiesTotal(0);
setPropertiesOffset(0);
invalidatePropertyRequests();
setAreaStats(null);
setUnfilteredAreaCount(null);
if (!fieldProjectionOnlyChanged) {
// Clear stale properties
setProperties([]);
setPropertiesTotal(0);
setPropertiesOffset(0);
invalidatePropertyRequests();
setAreaStats(null);
setUnfilteredAreaCount(null);
}
setLoadingAreaStats(true);
let cancelled = false;
const requestId = invalidateAreaRequests();
const fetchStats =
@ -580,11 +625,11 @@ export function useHexagonSelection({
fetchStats
.then((stats) => {
if (cancelled || !isCurrentAreaRequest(requestId)) return;
if (!isCurrentAreaRequest(requestId)) return;
setAreaStats(stats);
refreshUnfilteredAreaCount(selectedHexagon, stats.count, areaStatsUseFilters, requestId);
// Re-fetch properties if the properties tab is active and the filtered area still has matches.
if (areaStatsUseFilters && rightPaneTab === 'properties' && stats.count > 0) {
// Re-fetch properties if the properties tab is active and the selected basis has matches.
if (!fieldProjectionOnlyChanged && rightPaneTab === 'properties' && stats.count > 0) {
if (selectedHexagon.type === 'postcode') {
fetchPostcodeProperties(selectedHexagon.id, 0);
} else {
@ -593,17 +638,14 @@ export function useHexagonSelection({
}
})
.catch((error) => {
if (cancelled || !isCurrentAreaRequest(requestId)) return;
if (!isCurrentAreaRequest(requestId)) return;
logNonAbortError('Failed to refresh stats', error);
})
.finally(() => {
if (!cancelled && isCurrentAreaRequest(requestId)) setLoadingAreaStats(false);
if (isCurrentAreaRequest(requestId)) setLoadingAreaStats(false);
});
return () => {
cancelled = true;
};
}, [
areaStatsDataKey,
areaStatsQueryKey,
selectedHexagon,
fetchHexagonStats,

View file

@ -1,42 +1,211 @@
import { useCallback, useMemo, useRef, useState } from 'react';
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import type { Layer, PickingInfo } from '@deck.gl/core';
import { ScatterplotLayer, TextLayer } from '@deck.gl/layers';
import { PathLayer, ScatterplotLayer, TextLayer } from '@deck.gl/layers';
import Supercluster from 'supercluster';
import type { ActualListing } from '../types';
import { trackEvent } from '../lib/analytics';
const PRICE_LABEL_MIN_ZOOM = 14;
const ADDRESS_LABEL_MIN_ZOOM = 16;
const LISTING_CLUSTER_RADIUS = 18;
const LISTING_CLUSTER_MAX_ZOOM = 24;
const LISTING_CLUSTER_POPUP_LIMIT = 30;
const LISTING_SPIDERFY_LIMIT = 12;
const TILE_SIZE = 512;
export interface ListingPopupInfo {
interface SingleListingPopupInfo {
mode: 'single';
x: number;
y: number;
listing: ActualListing;
locked?: boolean;
}
interface ListingClusterPopupInfo {
mode: 'cluster';
x: number;
y: number;
count: number;
listings: ActualListing[];
locked?: boolean;
}
export type ListingPopupInfo = SingleListingPopupInfo | ListingClusterPopupInfo;
interface UseListingLayersProps {
listings: ActualListing[];
zoom: number;
isDark: boolean;
}
interface ListingClusterPoint {
lng: number;
lat: number;
count: number;
clusterId: number;
}
interface ExpandedListingMarker {
listing: ActualListing;
lng: number;
lat: number;
anchorLng: number;
anchorLat: number;
}
function formatShortPrice(price: number): string {
if (price >= 1_000_000) return `£${(price / 1_000_000).toFixed(price >= 10_000_000 ? 0 : 1)}M`;
if (price >= 1_000) return `£${Math.round(price / 1_000)}k`;
return `£${price}`;
}
function formatClusterCount(count: number): string {
if (count >= 1_000) return `${(count / 1_000).toFixed(count >= 10_000 ? 0 : 1)}k`;
return String(count);
}
function compareListingsForDisplay(left: ActualListing, right: ActualListing): number {
const dateCompare = (right.listing_date_iso ?? '').localeCompare(left.listing_date_iso ?? '');
if (dateCompare !== 0) return dateCompare;
return (right.asking_price ?? 0) - (left.asking_price ?? 0);
}
function getClusterListings(
index: Supercluster<ActualListing>,
clusterId: number,
limit: number
): ActualListing[] {
return index
.getLeaves(clusterId, limit, 0)
.map((feature) => feature.properties)
.sort(compareListingsForDisplay);
}
function offsetLngLat(
lng: number,
lat: number,
dxPixels: number,
dyPixels: number,
zoom: number
): [number, number] {
const worldSize = TILE_SIZE * Math.pow(2, zoom);
const lngPerPixel = 360 / worldSize;
const cosLat = Math.max(0.25, Math.cos((lat * Math.PI) / 180));
const latPerPixel = lngPerPixel / cosLat;
return [lng + dxPixels * lngPerPixel, lat - dyPixels * latPerPixel];
}
function spiderfyPosition(
lng: number,
lat: number,
index: number,
total: number,
zoom: number
): [number, number] {
if (total <= 1) return [lng, lat];
const radius = total <= 6 ? 24 : 32;
const angle = -Math.PI / 2 + (index / total) * Math.PI * 2;
return offsetLngLat(lng, lat, Math.cos(angle) * radius, Math.sin(angle) * radius, zoom);
}
export function useListingLayers({ listings, zoom, isDark }: UseListingLayersProps) {
const [popupInfo, setPopupInfo] = useState<ListingPopupInfo | null>(null);
const [selectedCluster, setSelectedCluster] = useState<ListingClusterPoint | null>(null);
const handleHover = useCallback((info: PickingInfo<ActualListing>) => {
if (info.object && info.x !== undefined && info.y !== undefined) {
setPopupInfo({ x: info.x, y: info.y, listing: info.object });
} else {
setPopupInfo(null);
useEffect(() => {
setSelectedCluster(null);
setPopupInfo(null);
}, [listings]);
const clusterIndex = useMemo(() => {
if (listings.length === 0) return null;
const index = new Supercluster<ActualListing>({
radius: LISTING_CLUSTER_RADIUS,
maxZoom: LISTING_CLUSTER_MAX_ZOOM,
});
const features: Supercluster.PointFeature<ActualListing>[] = listings
.filter((listing) => Number.isFinite(listing.lat) && Number.isFinite(listing.lon))
.map((listing) => ({
type: 'Feature',
geometry: { type: 'Point', coordinates: [listing.lon, listing.lat] },
properties: listing,
}));
index.load(features);
return index;
}, [listings]);
const clusterIndexRef = useRef(clusterIndex);
clusterIndexRef.current = clusterIndex;
const clusterZoom = Math.min(Math.floor(zoom), LISTING_CLUSTER_MAX_ZOOM);
const { visibleListings, clusters } = useMemo(() => {
if (!clusterIndex) {
return {
visibleListings: [] as ActualListing[],
clusters: [] as ListingClusterPoint[],
};
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const features = clusterIndex.getClusters([-180, -85, 180, 85], clusterZoom) as any[];
const individual: ActualListing[] = [];
const clusterPoints: ListingClusterPoint[] = [];
for (const feature of features) {
if (feature.properties.cluster) {
clusterPoints.push({
lng: feature.geometry.coordinates[0],
lat: feature.geometry.coordinates[1],
count: feature.properties.point_count,
clusterId: feature.properties.cluster_id,
});
} else {
individual.push(feature.properties as ActualListing);
}
}
return { visibleListings: individual, clusters: clusterPoints };
}, [clusterIndex, clusterZoom]);
const expandedListings = useMemo(() => {
if (!selectedCluster || !clusterIndex) return [];
const leaves = getClusterListings(
clusterIndex,
selectedCluster.clusterId,
LISTING_SPIDERFY_LIMIT
);
return leaves.map((listing, index) => {
const [lng, lat] = spiderfyPosition(
selectedCluster.lng,
selectedCluster.lat,
index,
leaves.length,
zoom
);
return {
listing,
lng,
lat,
anchorLng: selectedCluster.lng,
anchorLat: selectedCluster.lat,
};
});
}, [clusterIndex, selectedCluster, zoom]);
const clearUnlockedPopup = useCallback(() => {
setPopupInfo((current) => (current?.locked ? current : null));
}, []);
const handleHover = useCallback(
(info: PickingInfo<ActualListing>) => {
if (info.object && info.x !== undefined && info.y !== undefined) {
setPopupInfo({ mode: 'single', x: info.x, y: info.y, listing: info.object });
} else {
clearUnlockedPopup();
}
},
[clearUnlockedPopup]
);
const handleClick = useCallback((info: PickingInfo<ActualListing>) => {
const url = info.object?.listing_url;
if (!url) return;
@ -58,25 +227,115 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
[]
);
const handleExpandedHover = useCallback(
(info: PickingInfo<ExpandedListingMarker>) => {
if (info.object && info.x !== undefined && info.y !== undefined) {
setPopupInfo({ mode: 'single', x: info.x, y: info.y, listing: info.object.listing });
} else {
clearUnlockedPopup();
}
},
[clearUnlockedPopup]
);
const handleExpandedClick = useCallback((info: PickingInfo<ExpandedListingMarker>) => {
const url = info.object?.listing.listing_url;
if (!url) return;
trackEvent('Actual Listing Click', { url, source: 'cluster_expanded' });
window.open(url, '_blank', 'noopener,noreferrer');
}, []);
const handleExpandedHoverRef = useRef(handleExpandedHover);
handleExpandedHoverRef.current = handleExpandedHover;
const stableExpandedHover = useCallback(
(info: PickingInfo<ExpandedListingMarker>) => handleExpandedHoverRef.current(info),
[]
);
const handleExpandedClickRef = useRef(handleExpandedClick);
handleExpandedClickRef.current = handleExpandedClick;
const stableExpandedClick = useCallback(
(info: PickingInfo<ExpandedListingMarker>) => handleExpandedClickRef.current(info),
[]
);
const handleClusterHover = useCallback(
(info: PickingInfo<ListingClusterPoint>) => {
if (info.object && info.x !== undefined && info.y !== undefined) {
const cluster = info.object;
setPopupInfo((current) =>
current?.locked
? current
: {
mode: 'cluster',
x: info.x,
y: info.y,
count: cluster.count,
listings: [],
}
);
} else {
clearUnlockedPopup();
}
},
[clearUnlockedPopup]
);
const handleClusterClick = useCallback((info: PickingInfo<ListingClusterPoint>) => {
if (!info.object || info.x === undefined || info.y === undefined) return;
const index = clusterIndexRef.current;
if (!index) return;
const cluster = info.object;
const clusterListings = getClusterListings(
index,
cluster.clusterId,
LISTING_CLUSTER_POPUP_LIMIT
);
setSelectedCluster(cluster);
setPopupInfo({
mode: 'cluster',
x: info.x,
y: info.y,
count: cluster.count,
listings: clusterListings,
locked: true,
});
trackEvent('Actual Listing Cluster Click', { count: cluster.count });
}, []);
const handleClusterHoverRef = useRef(handleClusterHover);
handleClusterHoverRef.current = handleClusterHover;
const stableClusterHover = useCallback(
(info: PickingInfo<ListingClusterPoint>) => handleClusterHoverRef.current(info),
[]
);
const handleClusterClickRef = useRef(handleClusterClick);
handleClusterClickRef.current = handleClusterClick;
const stableClusterClick = useCallback(
(info: PickingInfo<ListingClusterPoint>) => handleClusterClickRef.current(info),
[]
);
const pinShadowLayer = useMemo(
() =>
new ScatterplotLayer<ActualListing>({
id: 'actual-listing-shadow',
data: listings,
data: visibleListings,
getPosition: (d) => [d.lon, d.lat],
getRadius: 8,
radiusUnits: 'pixels',
getFillColor: isDark ? [0, 0, 0, 80] : [0, 0, 0, 40],
pickable: false,
}),
[listings, isDark]
[visibleListings, isDark]
);
const pinLayer = useMemo(
() =>
new ScatterplotLayer<ActualListing>({
id: 'actual-listing-pin',
data: listings,
data: visibleListings,
getPosition: (d) => [d.lon, d.lat],
getRadius: 7,
radiusUnits: 'pixels',
@ -91,12 +350,108 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
onHover: stableHover,
onClick: stableClick,
}),
[listings, stableHover, stableClick]
[visibleListings, stableHover, stableClick]
);
const clusterShadowLayer = useMemo(
() =>
new ScatterplotLayer<ListingClusterPoint>({
id: 'actual-listing-cluster-shadow',
data: clusters,
getPosition: (d) => [d.lng, d.lat],
getRadius: (d) => Math.min(32, 13 + Math.sqrt(d.count) * 1.8),
radiusUnits: 'pixels',
getFillColor: isDark ? [0, 0, 0, 90] : [0, 0, 0, 45],
pickable: false,
}),
[clusters, isDark]
);
const clusterLayer = useMemo(
() =>
new ScatterplotLayer<ListingClusterPoint>({
id: 'actual-listing-cluster',
data: clusters,
getPosition: (d) => [d.lng, d.lat],
getRadius: (d) => Math.min(30, 12 + Math.sqrt(d.count) * 1.8),
radiusUnits: 'pixels',
getFillColor: isDark ? [185, 28, 28, 230] : [220, 38, 38, 230],
getLineColor: [255, 255, 255, isDark ? 90 : 180],
getLineWidth: 2,
lineWidthUnits: 'pixels',
stroked: true,
pickable: true,
autoHighlight: true,
highlightColor: [29, 228, 195, 220],
onHover: stableClusterHover,
onClick: stableClusterClick,
}),
[clusters, isDark, stableClusterHover, stableClusterClick]
);
const clusterTextLayer = useMemo(
() =>
new TextLayer<ListingClusterPoint>({
id: 'actual-listing-cluster-text',
data: clusters,
getPosition: (d) => [d.lng, d.lat],
getText: (d) => formatClusterCount(d.count),
getSize: 12,
getColor: [255, 255, 255, 255],
fontFamily: 'Inter, system-ui, sans-serif',
fontWeight: 800,
getTextAnchor: 'middle',
getAlignmentBaseline: 'center',
sizeUnits: 'pixels',
sizeMinPixels: 10,
sizeMaxPixels: 13,
pickable: false,
}),
[clusters]
);
const expandedConnectorLayer = useMemo(
() =>
new PathLayer<ExpandedListingMarker>({
id: 'actual-listing-expanded-lines',
data: expandedListings,
getPath: (d) => [
[d.anchorLng, d.anchorLat],
[d.lng, d.lat],
],
getColor: isDark ? [255, 255, 255, 80] : [80, 60, 50, 110],
getWidth: 1,
widthUnits: 'pixels',
pickable: false,
}),
[expandedListings, isDark]
);
const expandedPinLayer = useMemo(
() =>
new ScatterplotLayer<ExpandedListingMarker>({
id: 'actual-listing-expanded-pin',
data: expandedListings,
getPosition: (d) => [d.lng, d.lat],
getRadius: 6,
radiusUnits: 'pixels',
getFillColor: [231, 76, 60, 245],
getLineColor: [255, 255, 255, 255],
getLineWidth: 1.5,
lineWidthUnits: 'pixels',
stroked: true,
pickable: true,
autoHighlight: true,
highlightColor: [29, 228, 195, 220],
onHover: stableExpandedHover,
onClick: stableExpandedClick,
}),
[expandedListings, stableExpandedHover, stableExpandedClick]
);
const priceLabelLayer = useMemo(() => {
if (zoom < PRICE_LABEL_MIN_ZOOM) return null;
const labeled = listings.filter((l) => l.asking_price && l.asking_price > 0);
const labeled = visibleListings.filter((l) => l.asking_price && l.asking_price > 0);
return new TextLayer<ActualListing>({
id: 'actual-listing-price',
data: labeled,
@ -117,11 +472,11 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
sizeMaxPixels: 14,
pickable: false,
});
}, [listings, zoom, isDark]);
}, [visibleListings, zoom, isDark]);
const detailLabelLayer = useMemo(() => {
if (zoom < ADDRESS_LABEL_MIN_ZOOM) return null;
const labeled = listings.filter((l) => l.address || l.bedrooms != null);
const labeled = visibleListings.filter((l) => l.address || l.bedrooms != null);
return new TextLayer<ActualListing>({
id: 'actual-listing-detail',
data: labeled,
@ -148,16 +503,39 @@ export function useListingLayers({ listings, zoom, isDark }: UseListingLayersPro
sizeMaxPixels: 12,
pickable: false,
});
}, [listings, zoom, isDark]);
}, [visibleListings, zoom, isDark]);
const listingLayers = useMemo(() => {
const layers: Layer[] = [pinShadowLayer, pinLayer];
const layers: Layer[] = [
clusterShadowLayer,
clusterLayer,
clusterTextLayer,
pinShadowLayer,
pinLayer,
];
if (expandedListings.length > 0) {
layers.push(expandedConnectorLayer, expandedPinLayer);
}
if (priceLabelLayer) layers.push(priceLabelLayer);
if (detailLabelLayer) layers.push(detailLabelLayer);
return layers;
}, [pinShadowLayer, pinLayer, priceLabelLayer, detailLabelLayer]);
}, [
clusterShadowLayer,
clusterLayer,
clusterTextLayer,
pinShadowLayer,
pinLayer,
expandedListings.length,
expandedConnectorLayer,
expandedPinLayer,
priceLabelLayer,
detailLabelLayer,
]);
const clearListingPopup = useCallback(() => setPopupInfo(null), []);
const clearListingPopup = useCallback(() => {
setPopupInfo(null);
setSelectedCluster(null);
}, []);
return { listingLayers, listingPopup: popupInfo, clearListingPopup };
}

View file

@ -20,6 +20,7 @@ function viewChange(bounds: Bounds): ViewChangeParams {
return {
resolution: 8,
bounds,
visibleBounds: bounds,
zoom: 10,
latitude: (bounds.south + bounds.north) / 2,
longitude: (bounds.west + bounds.east) / 2,

View file

@ -84,6 +84,7 @@ export function useMapData({
const [postcodeData, setPostcodeData] = useState<PostcodeFeature[]>([]);
const [resolution, setResolution] = useState<number>(8);
const [bounds, setBounds] = useState<Bounds | null>(null);
const [visibleBounds, setVisibleBounds] = useState<Bounds | null>(null);
const [loading, setLoading] = useState<boolean>(false);
const [zoom, setZoom] = useState<number>(10);
const [currentView, setCurrentView] = useState<{
@ -685,6 +686,7 @@ export function useMapData({
({
resolution: newRes,
bounds: newBounds,
visibleBounds: newVisibleBounds,
zoom: newZoom,
latitude,
longitude,
@ -697,6 +699,7 @@ export function useMapData({
setResolution(newRes);
setBounds(newBounds);
}
setVisibleBounds(newVisibleBounds);
setZoom(newZoom);
setCurrentView({ latitude, longitude, zoom: newZoom });
setCurrentVisibleView({
@ -729,6 +732,7 @@ export function useMapData({
postcodeData: effectivePostcodeData,
resolution,
bounds,
visibleBounds,
loading: isLoading,
zoom,
currentView,

View file

@ -2,6 +2,7 @@ import { useEffect, useRef } from 'react';
import type { FeatureMeta, FeatureFilters } from '../types';
import { stateToParams } from '../lib/url-state';
import type { OverlayId } from '../lib/overlays';
import type { BasemapId } from '../lib/basemaps';
import type { TravelTimeEntry } from './useTravelTime';
const URL_DEBOUNCE_MS = 300;
@ -14,7 +15,8 @@ export function useUrlSync(
rightPaneTab: 'properties' | 'area',
travelTimeEntries?: TravelTimeEntry[],
share?: string,
selectedOverlays?: Set<OverlayId>
selectedOverlays?: Set<OverlayId>,
basemap?: BasemapId
) {
const urlDebounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
@ -31,7 +33,8 @@ export function useUrlSync(
rightPaneTab,
travelTimeEntries,
share,
selectedOverlays
selectedOverlays,
basemap
);
const search = params.toString();
const newUrl = search ? `${window.location.pathname}?${search}` : window.location.pathname;
@ -50,5 +53,6 @@ export function useUrlSync(
travelTimeEntries,
share,
selectedOverlays,
basemap,
]);
}

View file

@ -38,7 +38,7 @@ export const details: Record<string, Record<string, string>> = {
'Street tree density percentile':
"Couverture arborée approximative autour du centroïde du code postal, dérivée de la carte Trees Outside Woodland 2025 de Forest Research. Les polygones de couvert arboré des arbres isolés et groupes d'arbres sont comptés dans un rayon de 50 m de chaque centroïde de code postal, puis convertis en percentile parmi les codes postaux anglais. Il s'agit d'une approximation fondée sur le centroïde du code postal, pas d'une mesure exacte du bien ou du segment de rue.",
'Within conservation area':
"Limites de zones de conservation de Historic England, rattachées au point représentatif du code postal. Le jeu de données national est indicatif plutôt que définitif ; les décisions sensibles aux limites doivent être vérifiées auprès de l'autorité locale de planification.",
"Limites de zones de conservation de Planning Data, rattachées au point représentatif du code postal. Le jeu de données national est en cours d'amélioration et peut contenir des doublons ou une couverture locale incomplète ; les décisions sensibles aux limites doivent être vérifiées auprès de l'autorité locale de planification.",
'Listed building':
"Points de bâtiments classés de la National Heritage List for England de Historic England, associés prudemment aux adresses des biens à partir du nom de l'entrée classée et de codes postaux proches candidats. À traiter comme un signal de présélection, pas comme une décision juridique : vérifiez tout bien précis dans la NHLE et auprès de l'autorité locale de planification.",
'Good+ primary schools within 2km':
@ -188,7 +188,7 @@ export const details: Record<string, Record<string, string>> = {
'Street tree density percentile':
'Ungefähre Baumkronenbedeckung rund um den Postleitzahlen-Zentroiden aus der Forest-Research-Karte Trees Outside Woodland 2025. Baumkronen-Polygone für Einzelbäume und Baumgruppen werden im Umkreis von 50 m um jeden Postleitzahlen-Zentroiden gezählt und dann in ein Perzentil über englische Postleitzahlen umgerechnet. Dies ist ein Näherungswert auf Basis des Postleitzahlen-Zentroids, keine exakte Messung für Immobilie oder Straßenabschnitt.',
'Within conservation area':
'Historic-England-Grenzen für Erhaltungsgebiete, dem repräsentativen Punkt der Postleitzahl zugeordnet. Der nationale Datensatz ist indikativ und nicht rechtsverbindlich; grenznahe Entscheidungen sollten bei der lokalen Planungsbehörde geprüft werden.',
'Planning-Data-Grenzen für Erhaltungsgebiete, dem repräsentativen Punkt der Postleitzahl zugeordnet. Der nationale Datensatz wird laufend verbessert und kann Duplikate oder unvollständige lokale Abdeckung enthalten; grenznahe Entscheidungen sollten bei der lokalen Planungsbehörde geprüft werden.',
'Listed building':
'Punktdaten zu denkmalgeschützten Gebäuden aus der National Heritage List for England von Historic England, vorsichtig mit Immobilienadressen abgeglichen anhand des Namens des Denkmaleintrags und nahegelegener Postleitzahlkandidaten. Behandle dies als Vorauswahl-Hinweis, nicht als rechtliche Feststellung: Prüfe jede konkrete Immobilie in der NHLE und bei der lokalen Planungsbehörde.',
'Good+ primary schools within 2km':
@ -338,7 +338,7 @@ export const details: Record<string, Record<string, string>> = {
'Street tree density percentile':
'基于 Forest Research 2025 年 Trees Outside Woodland 地图估算的邮编质心周边树冠覆盖率。系统会统计每个邮编质心 50 米范围内的孤立树木和树群树冠多边形,然后转换为英格兰邮编范围内的百分位。这是邮编质心近似指标,不是精确的房产或道路路段测量。',
'Within conservation area':
'Historic England 保护区边界,与邮编代表点匹配。全国数据集是指示性而非最终权威;涉及边界的决策应向地方规划部门核实。',
'Planning Data 保护区边界,与邮编代表点匹配。全国数据集仍在完善中,可能包含重复记录或地方覆盖不完整;涉及边界的决策应向地方规划部门核实。',
'Listed building':
'Historic England 英格兰国家遗产名录NHLE中的受保护建筑点位记录会根据名录条目名称和附近候选邮编谨慎匹配到房产地址。请把它当作初筛信号而不是法律认定具体房产应在 NHLE 和地方规划部门核实。',
'Good+ primary schools within 2km':
@ -480,7 +480,7 @@ export const details: Record<string, Record<string, string>> = {
'Street tree density percentile':
'Forest Research के 2025 Trees Outside Woodland नक्शे से निकाला गया पोस्टकोड केंद्र के आसपास का अनुमानित वृक्ष आच्छादन. अकेले पेड़ों और पेड़ों के समूहों के वृक्ष-शिखर बहुभुजों को हर पोस्टकोड केंद्र से 50m के भीतर गिना जाता है, फिर इंग्लैंड के पोस्टकोडों के मुकाबले प्रतिशतक में बदला जाता है. यह पोस्टकोड-केंद्र पर आधारित अनुमानक है, किसी संपत्ति या सड़क-खंड की सटीक माप नहीं.',
'Within conservation area':
'Historic England संरक्षण क्षेत्र सीमाएं पोस्टकोड प्रतिनिधि बिंदु से मिलाई जाती हैं. राष्ट्रीय डेटासेट संकेतक है, अंतिम आधिकारिक नहीं; सीमा-संवेदनशील निर्णय स्थानीय योजना प्राधिकरण से जांचे जाने चाहिए.',
'Planning Data संरक्षण क्षेत्र सीमाएं पोस्टकोड प्रतिनिधि बिंदु से मिलाई जाती हैं. राष्ट्रीय डेटासेट अभी बेहतर किया जा रहा है और इसमें डुप्लीकेट या अधूरी स्थानीय कवरेज हो सकती है; सीमा-संवेदनशील निर्णय स्थानीय योजना प्राधिकरण से जांचे जाने चाहिए.',
'Listed building':
'Historic England की इंग्लैंड की राष्ट्रीय धरोहर सूची (NHLE) में सूचीबद्ध भवनों के बिंदु रिकॉर्ड, जिन्हें सूचीबद्ध प्रविष्टि के नाम और पास के संभावित पोस्टकोड के आधार पर संपत्ति पते से सावधानी से मिलाया गया है. इसे केवल प्रारंभिक जांच संकेत मानें, कानूनी निर्णय नहीं: किसी भी विशिष्ट संपत्ति को NHLE और स्थानीय योजना प्राधिकरण से सत्यापित करें.',
'Good+ primary schools within 2km':
@ -630,7 +630,7 @@ export const details: Record<string, Record<string, string>> = {
'Street tree density percentile':
'A Forest Research 2025-os Trees Outside Woodland térképéből származó hozzávetőleges lombkorona-fedettség az irányítószám-középpont körül. A magányos fák és facsoportok lombkorona-poligonjait minden irányítószám-középpont 50 méteres körzetében számoljuk, majd az angliai irányítószámok közötti percentilissé alakítjuk. Ez az irányítószám-középponton alapuló közelítő mutató, nem pontos ingatlan- vagy utcaszakasz-mérés.',
'Within conservation area':
'A Historic England műemléki területeinek határai az irányítószám reprezentatív pontjához rendelve. Az országos adatállomány tájékoztató jellegű, nem végleges; határérzékeny döntéseknél a helyi tervezési hatóság adatait kell ellenőrizni.',
'A Planning Data műemléki területeinek határai az irányítószám reprezentatív pontjához rendelve. Az országos adatállomány fejlesztés alatt áll, és tartalmazhat duplikátumokat vagy hiányos helyi lefedettséget; határérzékeny döntéseknél a helyi tervezési hatóság adatait kell ellenőrizni.',
'Listed building':
'A Historic England National Heritage List for England műemlékiépület-pontrekordjai, amelyeket óvatosan egyeztetünk ingatlancímekhez a műemléki bejegyzés neve és a közeli irányítószám-jelöltek alapján. Előszűrési jelzésként kezelendő, nem jogi megállapításként: minden konkrét ingatlant ellenőrizni kell az NHLE-ben és a helyi tervezési hatóságnál.',
'Good+ primary schools within 2km':

View file

@ -1139,8 +1139,8 @@ const de: Translations = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'Baumkronen-Polygone für Einzelbäume, Baumgruppen und kleine Gehölze in England. Hier verwendet, um Baumdeckungs-Perzentile rund um Postleitzahlen-Zentroide zu schätzen.',
dsConservationAreasName: 'Historic England Conservation Areas (Denkmalschutzgebiete)',
dsConservationAreasOrigin: 'Historic England und lokale Planungsbehörden',
dsConservationAreasName: 'Planning Data Conservation Areas (Denkmalschutzgebiete)',
dsConservationAreasOrigin: 'Planning Data / lokale Planungsbehörden',
dsConservationAreasUse:
'Grenzen ausgewiesener Conservation Areas in England. Wird genutzt, um zu kennzeichnen, ob der repräsentative Punkt einer Postleitzahl innerhalb eines solchen Denkmalschutzgebiets liegt.',
dsListedBuildingsName: 'Historic England denkmalgeschützte Gebäude',

View file

@ -811,8 +811,8 @@ const en = {
rooms: 'Rooms:',
built: 'Built:',
formerCouncil: 'Ex-council:',
exCouncilBadge: 'Maybe ex-council house',
listedBuildingBadge: 'Maybe listed',
exCouncilBadge: 'Likely ex-council house',
listedBuildingBadge: 'Likely listed',
epcRating: 'EPC rating:',
epcPotential: 'EPC potential:',
renovations: 'Renovations',
@ -1113,8 +1113,8 @@ const en = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'Tree canopy polygons for lone trees, groups of trees, and small woodlands in England. Used here to estimate tree coverage percentiles around postcode centroids.',
dsConservationAreasName: 'Historic England Conservation Areas',
dsConservationAreasOrigin: 'Historic England and local planning authorities',
dsConservationAreasName: 'Planning Data Conservation Areas',
dsConservationAreasOrigin: 'Planning Data / local planning authorities',
dsConservationAreasUse:
'Designated conservation area boundaries for England. Used to flag whether a postcode representative point falls within a conservation area.',
dsListedBuildingsName: 'Historic England Listed Buildings',

View file

@ -1148,8 +1148,8 @@ const fr: Translations = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'Polygones de couvert arboré pour les arbres isolés, groupes darbres et petits bois en Angleterre. Utilisés ici pour estimer les percentiles de couvert arboré autour des centroïdes de codes postaux.',
dsConservationAreasName: 'Zones de conservation de Historic England',
dsConservationAreasOrigin: 'Historic England et autorités locales de planification',
dsConservationAreasName: 'Zones de conservation de Planning Data',
dsConservationAreasOrigin: 'Planning Data / autorités locales de planification',
dsConservationAreasUse:
'Limites des zones de conservation désignées en Angleterre. Utilisées pour indiquer si le point représentatif dun code postal se trouve dans une zone de conservation.',
dsListedBuildingsName: 'Bâtiments classés Historic England',

View file

@ -1091,8 +1091,8 @@ const hi: Translations = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'इंग्लैंड में अकेले पेड़ों, पेड़ों के समूहों और छोटे वन क्षेत्रों के वृक्ष आच्छादन बहुभुज. यहां पोस्टकोड केंद्रों के आसपास वृक्ष आच्छादन प्रतिशतक का अनुमान लगाने के लिए उपयोग किया गया है.',
dsConservationAreasName: 'Historic England संरक्षण क्षेत्र',
dsConservationAreasOrigin: 'Historic England और स्थानीय योजना प्राधिकरण',
dsConservationAreasName: 'Planning Data संरक्षण क्षेत्र',
dsConservationAreasOrigin: 'Planning Data / स्थानीय योजना प्राधिकरण',
dsConservationAreasUse:
'इंग्लैंड में नामित संरक्षण क्षेत्रों की सीमाएं. इसका उपयोग यह दिखाने के लिए किया जाता है कि पोस्टकोड का प्रतिनिधि बिंदु संरक्षण क्षेत्र में आता है या नहीं.',
dsListedBuildingsName: 'Historic England सूचीबद्ध भवन',

View file

@ -1134,8 +1134,8 @@ const hu: Translations = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'Fák lombkorona-poligonjai magányos fákhoz, facsoportokhoz és kisebb erdőfoltokhoz Angliában. Itt az irányítószám-középpontok körüli lombkorona-fedettségi percentilisek becslésére használjuk.',
dsConservationAreasName: 'Historic England műemlékvédelmi területek',
dsConservationAreasOrigin: 'Historic England és helyi tervezési hatóságok',
dsConservationAreasName: 'Planning Data műemlékvédelmi területek',
dsConservationAreasOrigin: 'Planning Data / helyi tervezési hatóságok',
dsConservationAreasUse:
'Anglia kijelölt műemlékvédelmi területeinek határai. Annak jelzésére használjuk, hogy egy irányítószám reprezentatív pontja ilyen területre esik-e.',
dsListedBuildingsName: 'Historic England műemlék épületek',

View file

@ -1064,8 +1064,8 @@ const zh: Translations = {
dsTowOrigin: 'Forest Research / Defra NCEA',
dsTowUse:
'英格兰孤立树木、树群和小片林地的树冠多边形。此处用于估算邮编质心周围的树冠覆盖率百分位。',
dsConservationAreasName: 'Historic England 保护区',
dsConservationAreasOrigin: 'Historic England 和地方规划部门',
dsConservationAreasName: 'Planning Data 保护区',
dsConservationAreasOrigin: 'Planning Data / 地方规划部门',
dsConservationAreasUse: '英格兰指定保护区边界。用于标记邮编代表点是否位于保护区内。',
dsListedBuildingsName: 'Historic England 登录建筑',
dsListedBuildingsOrigin: 'Historic England 英格兰国家遗产名录',

View file

@ -0,0 +1,19 @@
export const BASEMAP_IDS = ['standard', 'satellite'] as const;
export type BasemapId = (typeof BASEMAP_IDS)[number];
export interface BasemapDefinition {
id: BasemapId;
label: string;
}
export const BASEMAPS: BasemapDefinition[] = [
{ id: 'standard', label: 'Map' },
{ id: 'satellite', label: 'Satellite' },
];
const BASEMAP_ID_SET = new Set<string>(BASEMAP_IDS);
export function isBasemapId(value: string): value is BasemapId {
return BASEMAP_ID_SET.has(value);
}

View file

@ -11,7 +11,7 @@ export const COLOR_RANGE_HIGH_PERCENTILE = 95;
export const MAP_BOUNDS: [number, number, number, number] = [-9.5, 49, 5, 57];
export const MAP_MIN_ZOOM = 5.5;
export const BUFFER_MULTIPLIER = 1.5;
export const BUFFER_MULTIPLIER = 1;
/** Demo free zone bounds (south, west, north, east) — must match server FREE_ZONE_BOUNDS */
export const FREE_ZONE_BOUNDS = { south: 51.44, west: -0.31, north: 51.59, east: 0.05 };

View file

@ -6,6 +6,7 @@ import {
DENSITY_GRADIENT,
ENUM_PALETTE,
FEATURE_GRADIENT,
BUFFER_MULTIPLIER,
MAP_BOUNDS,
POI_CATEGORY_LOGOS,
SMALLEST_VISIBLE_HEXAGON_RESOLUTION,
@ -15,6 +16,7 @@ import {
enumIndexToColor,
getBoundsFromViewState,
getBoundsWithBottomScreenInset,
getVisibleBoundsFromViewState,
getLatitudeAtVerticalPixelOffset,
getFeatureFillColor,
getMapCenterForTargetScreenPoint,
@ -31,17 +33,33 @@ describe('map utilities', () => {
expect(SMALLEST_VISIBLE_HEXAGON_RESOLUTION).toBe(9);
});
it('computes buffered bounds around a view state', () => {
const bounds = getBoundsFromViewState(
{ latitude: 51.5, longitude: -0.1, zoom: 12, pitch: 0 },
1200,
800
);
it('computes exact viewport bounds by default', () => {
const viewState = { latitude: 51.5, longitude: -0.1, zoom: 12, pitch: 0 };
const bounds = getBoundsFromViewState(viewState, 1200, 800);
const exactBounds = getBoundsFromViewState(viewState, 1200, 800, 1);
const bufferedBounds = getBoundsFromViewState(viewState, 1200, 800, 1.5);
expect(BUFFER_MULTIPLIER).toBe(1);
expect(bounds).toEqual(exactBounds);
expect(bounds.south).toBeLessThan(51.5);
expect(bounds.north).toBeGreaterThan(51.5);
expect(bounds.west).toBeLessThan(-0.1);
expect(bounds.east).toBeGreaterThan(-0.1);
expect(bufferedBounds.south).toBeLessThan(bounds.south);
expect(bufferedBounds.north).toBeGreaterThan(bounds.north);
expect(bufferedBounds.west).toBeLessThan(bounds.west);
expect(bufferedBounds.east).toBeGreaterThan(bounds.east);
});
it('excludes mobile bottom-sheet covered map area from visible bounds', () => {
const viewState = { latitude: 51.5, longitude: -0.1, zoom: 12, pitch: 0 };
const fullBounds = getVisibleBoundsFromViewState(viewState, 390, 800, 0);
const visibleBounds = getVisibleBoundsFromViewState(viewState, 390, 800, 352);
expect(visibleBounds.west).toBeCloseTo(fullBounds.west, 6);
expect(visibleBounds.east).toBeCloseTo(fullBounds.east, 6);
expect(visibleBounds.north).toBeCloseTo(fullBounds.north, 6);
expect(visibleBounds.south).toBeGreaterThan(fullBounds.south);
});
it('moves the map center so a target lands in the requested screen position', () => {

View file

@ -1,6 +1,7 @@
import type { ViewState, Bounds } from '../types';
import type { StyleSpecification } from 'maplibre-gl';
import { layers, namedFlavor } from '@protomaps/basemaps';
import type { BasemapId } from './basemaps';
import {
GLYPHS_URL,
FEATURE_GRADIENT,
@ -9,11 +10,19 @@ import {
TWEMOJI_BASE,
BUFFER_MULTIPLIER,
POI_CATEGORY_LOGOS,
MAP_MIN_ZOOM,
type GradientStop,
} from './consts';
const ROAD_OPACITY = 0.4;
const TILE_SIZE = 512;
const MAX_MERCATOR_LATITUDE = 85;
const SATELLITE_MAX_ZOOM = 13;
const SATELLITE_ATTRIBUTION =
'Sentinel-2 cloudless - https://s2maps.eu by EOX IT Services GmbH (Contains modified Copernicus Sentinel data 2024)';
export function getMapDataBeforeId(basemap: BasemapId): string {
return basemap === 'satellite' ? 'roads_runway' : 'landuse_park';
}
function clampLatitude(latitude: number): number {
return Math.max(-MAX_MERCATOR_LATITUDE, Math.min(MAX_MERCATOR_LATITUDE, latitude));
@ -66,10 +75,52 @@ export function getMapCenterForTargetScreenPoint(
};
}
export function getMapStyle(theme: 'light' | 'dark'): StyleSpecification {
function isSatelliteReferenceLayer(layer: ReturnType<typeof layers>[number]): boolean {
if (layer.type === 'symbol') return true;
if (layer.type !== 'line') return false;
return (
layer.id.startsWith('roads_') ||
layer.id.startsWith('boundaries') ||
layer.id.startsWith('water_')
);
}
function satelliteReferenceLayer(layer: ReturnType<typeof layers>[number]) {
if (layer.type === 'symbol') {
return {
...layer,
paint: {
...layer.paint,
'text-color': '#f8fafc',
'text-halo-color': '#111827',
'text-halo-width': 1.6,
'text-halo-blur': 0.3,
'icon-opacity': 0.9,
},
};
}
if (layer.type === 'line') {
const isCasing = layer.id.includes('casing');
const isBoundary = layer.id.startsWith('boundaries');
return {
...layer,
paint: {
...layer.paint,
'line-color': isBoundary ? '#f8fafc' : isCasing ? '#111827' : '#f9fafb',
'line-opacity': isBoundary ? 0.45 : isCasing ? 0.62 : 0.78,
},
};
}
return layer;
}
export function getMapStyle(theme: 'light' | 'dark', basemap: BasemapId): StyleSpecification {
const flavor = namedFlavor(theme);
// Use absolute URL for tiles - required by MapLibre
const tileUrl = `${window.location.origin}/api/tiles/{z}/{x}/{y}`;
const satelliteTileUrl = `${window.location.origin}/api/tiles/satellite/{z}/{x}/{y}`;
const baseLayers = layers('protomaps', flavor, { lang: 'en' });
const isDark = theme === 'dark';
@ -105,6 +156,50 @@ export function getMapStyle(theme: 'light' | 'dark'): StyleSpecification {
return layer;
});
if (basemap === 'satellite') {
return {
version: 8,
sprite: `${window.location.origin}/assets/sprites/${theme}`,
glyphs: GLYPHS_URL,
sources: {
satellite: {
type: 'raster',
tiles: [satelliteTileUrl],
tileSize: 256,
minzoom: MAP_MIN_ZOOM,
maxzoom: SATELLITE_MAX_ZOOM,
attribution: SATELLITE_ATTRIBUTION,
},
protomaps: {
type: 'vector',
tiles: [tileUrl],
maxzoom: 15,
},
},
layers: [
{
id: 'satellite-background',
type: 'background',
paint: {
'background-color': isDark ? '#111827' : '#d4cec3',
},
},
{
id: 'satellite-raster',
type: 'raster',
source: 'satellite',
paint: {
'raster-fade-duration': 120,
'raster-brightness-min': isDark ? 0.08 : 0,
'raster-brightness-max': isDark ? 0.86 : 1,
'raster-contrast': isDark ? 0.08 : 0.03,
},
},
...modifiedLayers.filter(isSatelliteReferenceLayer).map(satelliteReferenceLayer),
],
} as StyleSpecification;
}
return {
version: 8,
sprite: `${window.location.origin}/assets/sprites/${theme}`,
@ -209,15 +304,16 @@ export function zoomToResolution(zoom: number): number {
export function getBoundsFromViewState(
viewState: ViewState,
width: number,
height: number
height: number,
bufferMultiplier: number = BUFFER_MULTIPLIER
): Bounds {
const { longitude, latitude, zoom } = viewState;
const clampedLat = clampLatitude(latitude);
const scale = Math.pow(2, zoom);
const worldSize = TILE_SIZE * scale;
const bufferedWidth = width * BUFFER_MULTIPLIER;
const bufferedHeight = height * BUFFER_MULTIPLIER;
const bufferedWidth = width * bufferMultiplier;
const bufferedHeight = height * bufferMultiplier;
const degreesPerPixelLng = 360 / worldSize;
const halfWidthDeg = (bufferedWidth / 2) * degreesPerPixelLng;
@ -235,6 +331,58 @@ export function getBoundsFromViewState(
return { south, west, north, east };
}
export function getBoundsFromScreenRect(
viewState: ViewState,
width: number,
height: number,
rect: { left?: number; top?: number; right?: number; bottom?: number } = {}
): Bounds {
const { longitude, latitude, zoom } = viewState;
const worldSize = TILE_SIZE * Math.pow(2, zoom);
const centerPixelX = longitudeToWorldX(longitude, worldSize);
const centerPixelY = latitudeToWorldY(clampLatitude(latitude), worldSize);
const left = Math.min(rect.left ?? 0, rect.right ?? width);
const right = Math.max(rect.left ?? 0, rect.right ?? width);
const top = Math.min(rect.top ?? 0, rect.bottom ?? height);
const bottom = Math.max(rect.top ?? 0, rect.bottom ?? height);
const longitudeAtX = (screenX: number) => {
const worldX = centerPixelX + screenX - width / 2;
const rawLongitude = (worldX / worldSize) * 360 - 180;
return Math.max(-180, Math.min(180, rawLongitude));
};
const latitudeAtY = (screenY: number) => {
const worldY = centerPixelY + screenY - height / 2;
return Math.max(
-MAX_MERCATOR_LATITUDE,
Math.min(MAX_MERCATOR_LATITUDE, worldYToLatitude(worldY, worldSize))
);
};
const west = longitudeAtX(left);
const east = longitudeAtX(right);
const topLatitude = latitudeAtY(top);
const bottomLatitude = latitudeAtY(bottom);
return {
south: Math.min(topLatitude, bottomLatitude),
west: Math.min(west, east),
north: Math.max(topLatitude, bottomLatitude),
east: Math.max(west, east),
};
}
export function getVisibleBoundsFromViewState(
viewState: ViewState,
width: number,
height: number,
bottomScreenInset: number = 0
): Bounds {
const visibleBottom = height - Math.max(0, Math.min(height, bottomScreenInset));
return getBoundsFromScreenRect(viewState, width, height, { bottom: visibleBottom });
}
export function getLatitudeAtVerticalPixelOffset(
latitude: number,
zoom: number,

View file

@ -6,6 +6,7 @@ export interface OverlayDefinition {
id: OverlayId;
label: string;
description: string;
detail: string;
}
export const OVERLAYS: OverlayDefinition[] = [
@ -13,16 +14,22 @@ export const OVERLAYS: OverlayDefinition[] = [
id: 'noise',
label: 'Noise',
description: 'High-resolution Defra Lden noise raster',
detail:
'Defra Strategic Noise Mapping Round 4 (2022), combining road, rail, and airport sources. Values are the EU-standard Lden metric (day-evening-night 24-hour weighted average), modelled on a 10m grid at 4m above ground. Brighter areas indicate higher modelled noise. Licensed under the Open Government Licence v3.0.',
},
{
id: 'crime-hotspots',
label: 'Crime hotspots',
description: 'Approximate police.uk street-crime heatmap',
detail:
'Client-side heatmap of street-level crimes published by police.uk over the most recent months. Police.uk coordinates are anonymised snap-to-grid points, not exact offence locations, so the heatmap should be read as an approximation of relative density rather than a precise map of incidents.',
},
{
id: 'trees-outside-woodlands',
label: 'Trees',
description: 'Trees Outside Woodland canopy polygons',
detail:
'Forest Research Trees Outside Woodland (TOW) v1 canopy polygons covering lone trees and groups of trees outside mapped woodland blocks. Useful for spotting tree-lined streets and green pockets that broader land-use layers miss. Polygon opacity scales with canopy area.',
},
];

View file

@ -1,11 +1,14 @@
import { describe, expect, it } from 'vitest';
import type { FeatureMeta } from '../types';
import type { FeatureFilters, FeatureMeta } from '../types';
import {
POI_COUNT_2KM_FILTER_NAME,
POI_DISTANCE_FILTER_NAME,
TRANSPORT_DISTANCE_FILTER_NAME,
clampPoiFilterRange,
createPoiFilterKey,
createPoiDistanceFilterKey,
getActiveAmenityFilterFeatureNames,
getPoiFilterFeatureOptions,
getPoiFilterName,
} from './poi-distance-filter';
@ -60,6 +63,20 @@ describe('poi-distance-filter', () => {
expect(getPoiFilterName('Number of amenities (Bus stop) within 2km')).toBeNull();
});
it('extracts only active non-transport amenity backend feature names', () => {
const cafeDistance = 'Distance to nearest amenity (Cafe) (km)';
const parkCount = 'Number of amenities (Park) within 2km';
const busStopDistance = 'Distance to nearest amenity (Bus stop) (km)';
const filters: FeatureFilters = {
[createPoiDistanceFilterKey(cafeDistance, 0)]: [0, 1],
[createPoiFilterKey(POI_COUNT_2KM_FILTER_NAME, parkCount, 1)]: [2, 10],
[createPoiFilterKey(TRANSPORT_DISTANCE_FILTER_NAME, busStopDistance, 2)]: [0, 0.5],
Price: [0, 500000],
};
expect([...getActiveAmenityFilterFeatureNames(filters)]).toEqual([cafeDistance, parkCount]);
});
it('clamps fixed amenity distance scales to the 0-5km slider bounds', () => {
const feature = numeric('Distance to nearest amenity (Cafe) (km)', {
absolute: true,

View file

@ -203,6 +203,20 @@ export function getPoiDistanceFeatureName(name: string): string | null {
return parsePoiFilterKey(name);
}
export function getActiveAmenityFilterFeatureNames(filters: FeatureFilters): Set<string> {
const names = new Set<string>();
for (const name of Object.keys(filters)) {
const filterName = getPoiFilterName(name);
if (!filterName || filterName === TRANSPORT_DISTANCE_FILTER_NAME) continue;
const featureName = getPoiDistanceFeatureName(name);
if (featureName) names.add(featureName);
}
return names;
}
export function replacePoiFilterKeySelection(key: string, featureName: string): string {
const filterName =
getPoiFilterName(key) ??

View file

@ -173,6 +173,30 @@ describe('url-state', () => {
expect(state.overlays).toEqual(new Set(['noise', 'crime-hotspots']));
});
it('round-trips satellite basemap selection', () => {
const params = stateToParams(
null,
{},
[],
new Set(),
'area',
undefined,
undefined,
undefined,
'satellite'
);
expect(params.get('basemap')).toBe('satellite');
window.history.replaceState({}, '', `/?${params.toString()}`);
const state = parseUrlState();
expect(state.basemap).toBe('satellite');
window.history.replaceState({}, '', '/?basemap=unknown');
expect(parseUrlState().basemap).toBe('standard');
});
it('round-trips repeated school filters with dedicated URL params', () => {
const schoolOne = createSchoolFilterKey('primary', 'good', 2, 1);
const schoolTwo = createSchoolFilterKey('secondary', 'outstanding', 5, 2);

View file

@ -50,6 +50,7 @@ import {
} from './poi-distance-filter';
import { dedupeTravelTimeEntries } from './travel-params';
import { isOverlayId, type OverlayId } from './overlays';
import { isBasemapId, type BasemapId } from './basemaps';
const POI_NONE_PARAM = '__none';
@ -58,6 +59,7 @@ export interface UrlState {
filters: FeatureFilters;
poiCategories: Set<string>;
overlays: Set<OverlayId>;
basemap: BasemapId;
tab: 'properties' | 'area';
travelTime?: TravelTimeInitial;
postcode?: string;
@ -213,6 +215,7 @@ export function parseUrlState(): UrlState {
filters: parseFilters(params),
poiCategories: new Set(),
overlays: new Set(),
basemap: 'standard',
tab: 'area',
};
@ -253,6 +256,11 @@ export function parseUrlState(): UrlState {
result.overlays = new Set(overlayParams.filter(isOverlayId));
}
const basemap = params.get('basemap');
if (basemap && isBasemapId(basemap)) {
result.basemap = basemap;
}
// Tab: full name
const tab = params.get('tab');
if (tab === 'properties' || tab === 'area') {
@ -320,7 +328,8 @@ export function stateToParams(
rightPaneTab: 'properties' | 'area',
travelTimeEntries?: TravelTimeEntry[],
share?: string,
selectedOverlays?: Set<OverlayId>
selectedOverlays?: Set<OverlayId>,
basemap?: BasemapId
): URLSearchParams {
const params = new URLSearchParams();
@ -409,6 +418,10 @@ export function stateToParams(
}
}
if (basemap && basemap !== 'standard') {
params.set('basemap', basemap);
}
// Travel time: repeated `tt` params
if (travelTimeEntries) {
for (const entry of dedupeTravelTimeEntries(travelTimeEntries)) {

View file

@ -85,6 +85,7 @@ export interface MapFlyToOptions {
export interface ViewChangeParams {
resolution: number;
bounds: Bounds;
visibleBounds: Bounds;
zoom: number;
latitude: number;
longitude: number;

View file

@ -1,6 +1,6 @@
"""Download Historic England conservation area polygons.
"""Download Planning Data conservation area polygons.
Source: Historic England Conservation Areas
Source: https://www.planning.data.gov.uk/dataset/conservation-area
License: Open Government Licence v3.0
"""
@ -9,38 +9,60 @@ from pathlib import Path
import httpx
import pyogrio
from shapely import from_wkb
URL = (
"https://opendata-historicengland.hub.arcgis.com/api/download/v1/items/"
"446bc9bf8b5b440386d0c504caa3dac5/geoPackage?layers=0"
)
URL = "https://files.planning.data.gov.uk/dataset/conservation-area.geojson"
def _geometry_column(metadata: dict, column_names: list[str]) -> str:
geometry_name = metadata.get("geometry_name")
if geometry_name:
return str(geometry_name)
for name in ("wkb_geometry", "geometry", "geom"):
if name in column_names:
return name
return column_names[-1]
def _validate_conservation_areas(path: Path) -> int:
info = pyogrio.read_info(path)
features = info.get("features", 0)
if features <= 0:
raise ValueError("Downloaded conservation areas file contains no features")
metadata, table = pyogrio.read_arrow(path, columns=[], read_geometry=True)
geometry_name = _geometry_column(metadata, table.column_names)
geometries = from_wkb(table[geometry_name].combine_chunks().to_pylist())
polygon_count = sum(
geom is not None
and not geom.is_empty
and geom.geom_type in {"Polygon", "MultiPolygon"}
for geom in geometries
)
if polygon_count <= 0:
raise ValueError("Downloaded conservation areas file contains no polygons")
return int(features)
def main() -> None:
parser = argparse.ArgumentParser(
description="Download Historic England conservation area polygons"
description="Download Planning Data conservation area polygons"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output GeoPackage file path"
"--output", type=Path, required=True, help="Output GeoJSON file path"
)
args = parser.parse_args()
args.output.parent.mkdir(parents=True, exist_ok=True)
tmp_path = args.output.with_name(f"{args.output.stem}.tmp{args.output.suffix}")
print("Downloading Historic England conservation areas...")
print("Downloading Planning Data conservation areas...")
with httpx.stream("GET", URL, follow_redirects=True, timeout=300) as response:
response.raise_for_status()
with tmp_path.open("wb") as fh:
for chunk in response.iter_bytes():
fh.write(chunk)
info = pyogrio.read_info(tmp_path)
features = info.get("features", 0)
geometry_type = info.get("geometry_type")
if features <= 0:
raise ValueError("Downloaded conservation areas file contains no features")
if "Polygon" not in str(geometry_type):
raise ValueError(f"Expected polygon geometry, got {geometry_type!r}")
features = _validate_conservation_areas(tmp_path)
tmp_path.replace(args.output)
size_mb = args.output.stat().st_size / (1024 * 1024)

View file

@ -8,15 +8,61 @@ License: INSPIRE End User Licence
"""
import argparse
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
from html.parser import HTMLParser
from pathlib import Path
import time
import zipfile
from urllib.parse import urljoin, urlparse
import httpx
from tqdm import tqdm
BASE = "https://use-land-property-data.service.gov.uk"
INDEX_URL = f"{BASE}/datasets/inspire/download"
BASE_URL = "https://use-land-property-data.service.gov.uk"
INDEX_URL = f"{BASE_URL}/datasets/inspire/download"
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; perfect-postcode-data-pipeline/1.0)"
}
CHUNK_SIZE = 1024 * 1024
MAX_ATTEMPTS = 5
BACKOFF_BASE = 2.0
class ZipLinkParser(HTMLParser):
"""Collect links to Land Registry INSPIRE ZIP downloads."""
def __init__(self, base_url: str) -> None:
super().__init__()
self.base_url = base_url
self.base_netloc = urlparse(base_url).netloc
self.urls: set[str] = set()
def handle_starttag(
self, tag: str, attrs: list[tuple[str, str | None]]
) -> None:
if tag != "a":
return
href = dict(attrs).get("href")
if not href:
return
url = urljoin(self.base_url, href)
parsed = urlparse(url)
if (
parsed.scheme in {"http", "https"}
and parsed.netloc == self.base_netloc
and parsed.path.startswith("/datasets/inspire/download/")
and parsed.path.endswith(".zip")
):
self.urls.add(parsed._replace(query="", fragment="").geturl())
def parse_zip_urls(html: str, base_url: str = BASE_URL) -> list[str]:
"""Parse the INSPIRE download page for all council ZIP URLs."""
parser = ZipLinkParser(base_url)
parser.feed(html)
return sorted(parser.urls)
def get_zip_urls() -> list[str]:
@ -25,28 +71,76 @@ def get_zip_urls() -> list[str]:
with httpx.Client(
follow_redirects=True,
timeout=httpx.Timeout(30.0, read=60),
headers={"User-Agent": "Mozilla/5.0", "Accept": "text/html"},
headers={**HEADERS, "Accept": "text/html"},
) as client:
resp = client.get(INDEX_URL)
resp.raise_for_status()
html = resp.text
pattern = r'href="(/datasets/inspire/download/[^"]+\.zip)"'
paths = sorted(set(re.findall(pattern, html)))
return [f"{BASE}{p}" for p in paths]
urls = parse_zip_urls(html)
if not urls:
raise RuntimeError(f"No INSPIRE ZIP links found at {INDEX_URL}")
return urls
def download_one(url: str, output_dir: Path, client: httpx.Client) -> str:
def _is_valid_zip(path: Path) -> bool:
return path.exists() and zipfile.is_zipfile(path)
def _stream_download(url: str, output_path: Path, *, timeout: float) -> None:
with httpx.stream(
"GET",
url,
follow_redirects=True,
timeout=httpx.Timeout(30.0, read=timeout),
headers=HEADERS,
) as response:
response.raise_for_status()
with output_path.open("wb") as out:
for chunk in response.iter_bytes(chunk_size=CHUNK_SIZE):
out.write(chunk)
def download_one(
url: str,
output_dir: Path,
*,
force: bool = False,
timeout: float = 600,
) -> str:
"""Download a single ZIP file. Returns the filename."""
name = url.rsplit("/", 1)[-1]
dest = output_dir / name
if dest.exists():
return f"{name} (skipped, exists)"
name = Path(urlparse(url).path).name
if not name.endswith(".zip"):
raise ValueError(f"Expected a ZIP download URL, got {url}")
resp = client.get(url)
resp.raise_for_status()
dest.write_bytes(resp.content)
return name
output_dir.mkdir(parents=True, exist_ok=True)
dest = output_dir / name
if not force and _is_valid_zip(dest):
return f"{name} (skipped, valid ZIP exists)"
tmp = dest.with_suffix(dest.suffix + ".tmp")
last_exc: Exception | None = None
try:
for attempt in range(1, MAX_ATTEMPTS + 1):
tmp.unlink(missing_ok=True)
try:
_stream_download(url, tmp, timeout=timeout)
if not _is_valid_zip(tmp):
raise RuntimeError(
f"{name} did not download as a valid ZIP"
)
tmp.replace(dest)
return name
except (httpx.HTTPError, OSError) as exc:
last_exc = exc
if attempt < MAX_ATTEMPTS:
time.sleep(BACKOFF_BASE ** (attempt - 1))
finally:
tmp.unlink(missing_ok=True)
raise RuntimeError(
f"{name} failed after {MAX_ATTEMPTS} attempts"
) from last_exc
def main() -> None:
@ -65,32 +159,61 @@ def main() -> None:
default=8,
help="Number of parallel downloads (default: 8)",
)
parser.add_argument(
"--force",
action="store_true",
help="Re-download files even when a valid ZIP already exists",
)
parser.add_argument(
"--timeout",
type=float,
default=600,
help="Per-file read timeout in seconds (default: 600)",
)
args = parser.parse_args()
if args.workers < 1:
raise SystemExit("--workers must be at least 1")
args.output.mkdir(parents=True, exist_ok=True)
print("Fetching download index...")
urls = get_zip_urls()
print(f"Found {len(urls)} files to download")
with (
httpx.Client(
follow_redirects=True,
timeout=httpx.Timeout(30.0, read=120),
headers={"User-Agent": "Mozilla/5.0"},
) as client,
tqdm(total=len(urls), unit="file") as pbar,
):
failures: list[tuple[str, Exception]] = []
with tqdm(total=len(urls), unit="file") as pbar:
with ThreadPoolExecutor(max_workers=args.workers) as pool:
futures = {
pool.submit(download_one, url, args.output, client): url for url in urls
pool.submit(
download_one,
url,
args.output,
force=args.force,
timeout=args.timeout,
): url
for url in urls
}
for future in as_completed(futures):
result = future.result()
pbar.set_postfix_str(result[:40])
try:
result = future.result()
pbar.set_postfix_str(result[:40])
except Exception as exc: # noqa: BLE001
failures.append((futures[future], exc))
pbar.set_postfix_str("FAILED")
pbar.update(1)
print(f"Done. {len(urls)} files in {args.output}")
succeeded = len(urls) - len(failures)
print(f"Done. {succeeded}/{len(urls)} files in {args.output}")
if failures:
print(f"{len(failures)} file(s) failed:")
for url, exc in failures:
name = Path(urlparse(url).path).name
print(f" - {name}: {exc}")
raise SystemExit(
f"{len(failures)} INSPIRE download(s) failed; "
"re-run to retry only the missing files"
)
if __name__ == "__main__":

View file

@ -5,6 +5,7 @@ License: Open Government Licence v3.0
"""
import argparse
import time
from pathlib import Path
import httpx
@ -14,6 +15,8 @@ URL = (
"https://opendata-historicengland.hub.arcgis.com/api/download/v1/items/"
"767f279327a24845bf47dfe5eae9862b/geoPackage?layers=0"
)
POLL_INTERVAL_S = 5
POLL_TIMEOUT_S = 600
def main() -> None:
@ -28,11 +31,24 @@ def main() -> None:
tmp_path = args.output.with_name(f"{args.output.stem}.tmp{args.output.suffix}")
print("Downloading Historic England listed-building points...")
with httpx.stream("GET", URL, follow_redirects=True, timeout=300) as response:
response.raise_for_status()
with tmp_path.open("wb") as fh:
for chunk in response.iter_bytes():
fh.write(chunk)
deadline = time.monotonic() + POLL_TIMEOUT_S
with httpx.Client(follow_redirects=True, timeout=300) as client:
while True:
with client.stream("GET", URL) as response:
if response.status_code == 202:
response.read()
if time.monotonic() > deadline:
raise TimeoutError(
f"Export did not finish within {POLL_TIMEOUT_S}s: "
f"{response.text}"
)
time.sleep(POLL_INTERVAL_S)
continue
response.raise_for_status()
with tmp_path.open("wb") as fh:
for chunk in response.iter_bytes():
fh.write(chunk)
break
info = pyogrio.read_info(tmp_path)
features = info.get("features", 0)

View file

@ -0,0 +1,432 @@
"""Download Sentinel-2 cloudless satellite tiles into a local PMTiles archive."""
from __future__ import annotations
import argparse
import email.utils
import http.client
import math
import sqlite3
import subprocess
import tempfile
import threading
import time
import urllib.error
import urllib.parse
import urllib.request
from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from pipeline.download.tiles import ensure_pmtiles_cli
from pipeline.local_temp import local_tmp_dir
DEFAULT_TILE_URL = (
"https://tiles.maps.eox.at/wmts/1.0.0/s2cloudless_3857/default/"
"GoogleMapsCompatible/{z}/{y}/{x}.jpg"
)
DEFAULT_BBOX = (-10.5, 49.0, 5.0, 61.0)
DEFAULT_MIN_ZOOM = 5
DEFAULT_MAX_ZOOM = 13
DEFAULT_RETRY_COOLDOWN = 15.0
USER_AGENT = "perfect-postcode-satellite-tiles/1.0"
RETRYABLE_HTTP_STATUS = {408, 429, 500, 502, 503, 504}
ATTRIBUTION = (
"Sentinel-2 cloudless - https://s2maps.eu by EOX IT Services GmbH "
"(Contains modified Copernicus Sentinel data 2024)"
)
@dataclass(frozen=True)
class Tile:
zoom: int
x: int
y: int
class _DownloadThrottle:
def __init__(self, min_request_interval: float) -> None:
self._min_request_interval = max(0.0, min_request_interval)
self._next_request_at = 0.0
self._lock = threading.Lock()
def wait(self) -> None:
while True:
with self._lock:
now = time.monotonic()
wait_for = self._next_request_at - now
if wait_for <= 0:
if self._min_request_interval:
self._next_request_at = now + self._min_request_interval
return
time.sleep(min(wait_for, 1.0))
def defer(self, delay: float) -> bool:
if delay <= 0:
return False
target = time.monotonic() + delay
with self._lock:
should_announce = target > self._next_request_at + 1.0
self._next_request_at = max(self._next_request_at, target)
return should_announce
def _lonlat_to_tile(lon: float, lat: float, zoom: int) -> tuple[int, int]:
lat = max(min(lat, 85.05112878), -85.05112878)
n = 1 << zoom
x = int(math.floor((lon + 180.0) / 360.0 * n))
y = int(
math.floor((1.0 - math.asinh(math.tan(math.radians(lat))) / math.pi) / 2.0 * n)
)
return min(max(x, 0), n - 1), min(max(y, 0), n - 1)
def _tile_ranges(
bbox: tuple[float, float, float, float], zoom: int
) -> tuple[range, range]:
west, south, east, north = bbox
min_x, min_y = _lonlat_to_tile(west, north, zoom)
max_x, max_y = _lonlat_to_tile(east, south, zoom)
return range(min_x, max_x + 1), range(min_y, max_y + 1)
def _iter_tiles(
bbox: tuple[float, float, float, float], min_zoom: int, max_zoom: int
):
for zoom in range(min_zoom, max_zoom + 1):
x_range, y_range = _tile_ranges(bbox, zoom)
for x in x_range:
for y in y_range:
yield Tile(zoom=zoom, x=x, y=y)
def _tile_count(
bbox: tuple[float, float, float, float], min_zoom: int, max_zoom: int
) -> int:
count = 0
for zoom in range(min_zoom, max_zoom + 1):
x_range, y_range = _tile_ranges(bbox, zoom)
count += len(x_range) * len(y_range)
return count
def _is_eox_tile_url(url: str) -> bool:
host = urllib.parse.urlparse(url).hostname or ""
return host == "tiles.maps.eox.at" or host.endswith(".tiles.maps.eox.at")
def _retry_after_seconds(headers) -> float | None:
raw = None
if headers is not None:
raw = headers.get("retry-after") or headers.get("Retry-After")
if not raw:
return None
try:
return max(0.0, float(raw))
except ValueError:
pass
try:
retry_at = email.utils.parsedate_to_datetime(raw)
except (TypeError, ValueError):
return None
if retry_at.tzinfo is None:
retry_at = retry_at.replace(tzinfo=timezone.utc)
return max(0.0, (retry_at - datetime.now(timezone.utc)).total_seconds())
def _http_retry_delay(
err: urllib.error.HTTPError,
url: str,
attempt: int,
retry_cooldown: float,
) -> float | None:
if err.code in {204, 404}:
return None
retry_after = _retry_after_seconds(err.headers)
if retry_after is not None:
return retry_after
if err.code == 403 and _is_eox_tile_url(url):
return retry_cooldown
if err.code in RETRYABLE_HTTP_STATUS:
return min(2.0, 0.25 * (2**attempt))
return None
def _fetch_tile(
tile: Tile,
source_url: str,
timeout: float,
retries: int,
throttle: _DownloadThrottle,
retry_cooldown: float,
) -> tuple[Tile, bytes | None]:
url = source_url.format(z=tile.zoom, x=tile.x, y=tile.y)
last_error: Exception | None = None
for attempt in range(retries + 1):
try:
throttle.wait()
req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
with urllib.request.urlopen(req, timeout=timeout) as response:
content_type = response.headers.get("content-type", "")
data = response.read()
if not data:
return tile, None
if not content_type.lower().startswith("image/"):
raise RuntimeError(
f"Unexpected content type for {url}: {content_type or 'unknown'}"
)
return tile, data
except urllib.error.HTTPError as err:
if err.code in {204, 404}:
return tile, None
retry_delay = _http_retry_delay(err, url, attempt, retry_cooldown)
if retry_delay is None:
raise RuntimeError(
f"Failed to download satellite tile {url}: {err}"
) from err
last_error = err
except (
TimeoutError,
urllib.error.URLError,
ConnectionError,
http.client.HTTPException,
RuntimeError,
) as err:
last_error = err
retry_delay = min(2.0, 0.25 * (2**attempt))
if attempt < retries:
if throttle.defer(retry_delay) and retry_delay >= 5.0:
print(
f"Satellite tile source returned {last_error}; "
f"pausing downloads for {retry_delay:.0f}s before retrying",
flush=True,
)
assert last_error is not None
raise RuntimeError(f"Failed to download satellite tile {url}: {last_error}") from last_error
def _create_mbtiles(
mbtiles_path: Path,
bbox: tuple[float, float, float, float],
min_zoom: int,
max_zoom: int,
source_url: str,
max_workers: int,
timeout: float,
retries: int,
retry_cooldown: float,
min_request_interval: float,
) -> int:
if mbtiles_path.exists():
mbtiles_path.unlink()
conn = sqlite3.connect(mbtiles_path)
conn.execute("PRAGMA journal_mode = WAL")
conn.execute("PRAGMA synchronous = NORMAL")
conn.execute("CREATE TABLE metadata (name TEXT, value TEXT)")
conn.execute(
"CREATE TABLE tiles (zoom_level INTEGER, tile_column INTEGER, "
"tile_row INTEGER, tile_data BLOB)"
)
conn.execute(
"CREATE UNIQUE INDEX tile_index ON tiles (zoom_level, tile_column, tile_row)"
)
conn.executemany(
"INSERT INTO metadata (name, value) VALUES (?, ?)",
[
("name", "Sentinel-2 cloudless satellite basemap"),
("type", "baselayer"),
("version", "1"),
("description", "Sentinel-2 cloudless Web Mercator satellite imagery"),
("format", "jpg"),
("attribution", ATTRIBUTION),
("bounds", ",".join(f"{value:.6f}" for value in bbox)),
("minzoom", str(min_zoom)),
("maxzoom", str(max_zoom)),
],
)
total = _tile_count(bbox, min_zoom, max_zoom)
inserted = 0
completed = 0
submitted = 0
tiles = iter(_iter_tiles(bbox, min_zoom, max_zoom))
pending: set[Future[tuple[Tile, bytes | None]]] = set()
queue_size = max_workers * 4
throttle = _DownloadThrottle(min_request_interval=min_request_interval)
def submit_next(executor: ThreadPoolExecutor) -> bool:
nonlocal submitted
try:
tile = next(tiles)
except StopIteration:
return False
pending.add(
executor.submit(
_fetch_tile,
tile,
source_url,
timeout,
retries,
throttle,
retry_cooldown,
)
)
submitted += 1
return True
try:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for _ in range(queue_size):
if not submit_next(executor):
break
while pending:
done, pending = wait(pending, return_when=FIRST_COMPLETED)
for future in done:
tile, tile_data = future.result()
completed += 1
if tile_data is not None:
tms_y = (1 << tile.zoom) - 1 - tile.y
conn.execute(
"INSERT OR REPLACE INTO tiles VALUES (?, ?, ?, ?)",
(tile.zoom, tile.x, tms_y, tile_data),
)
inserted += 1
submit_next(executor)
if completed % 1000 == 0 or completed == total:
conn.commit()
print(
f"Downloaded {completed:,}/{total:,} satellite tiles "
f"({inserted:,} stored)",
flush=True,
)
finally:
conn.commit()
conn.close()
return inserted
def build_satellite_tiles(
output_path: Path,
pmtiles_bin: Path,
pmtiles_version: str,
bbox: tuple[float, float, float, float],
min_zoom: int,
max_zoom: int,
source_url: str,
max_workers: int,
timeout: float,
retries: int,
retry_cooldown: float,
min_request_interval: float,
) -> None:
if min_zoom > max_zoom:
raise ValueError("--min-zoom must be <= --max-zoom")
if len(bbox) != 4 or bbox[0] >= bbox[2] or bbox[1] >= bbox[3]:
raise ValueError("--bbox must be west,south,east,north")
output_path.parent.mkdir(parents=True, exist_ok=True)
ensure_pmtiles_cli(pmtiles_bin, pmtiles_version)
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp:
mbtiles_path = Path(tmp) / "satellite.mbtiles"
tile_count = _create_mbtiles(
mbtiles_path=mbtiles_path,
bbox=bbox,
min_zoom=min_zoom,
max_zoom=max_zoom,
source_url=source_url,
max_workers=max_workers,
timeout=timeout,
retries=retries,
retry_cooldown=retry_cooldown,
min_request_interval=min_request_interval,
)
if tile_count == 0:
raise RuntimeError("Satellite tile download produced no tiles")
subprocess.run(
[
str(pmtiles_bin),
"convert",
str(mbtiles_path),
str(output_path),
"--force",
],
check=True,
)
size_mb = output_path.stat().st_size / (1024 * 1024)
print(f"Wrote {output_path} ({size_mb:.1f} MB)", flush=True)
def _parse_bbox(raw: str) -> tuple[float, float, float, float]:
parts = [float(part.strip()) for part in raw.split(",")]
if len(parts) != 4:
raise argparse.ArgumentTypeError("bbox must contain four comma-separated numbers")
return parts[0], parts[1], parts[2], parts[3]
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--output", type=Path, required=True)
parser.add_argument(
"--pmtiles-bin", type=Path, default=Path("property-data/pmtiles")
)
parser.add_argument("--pmtiles-version", default="1.22.3")
parser.add_argument("--bbox", type=_parse_bbox, default=DEFAULT_BBOX)
parser.add_argument("--min-zoom", type=int, default=DEFAULT_MIN_ZOOM)
parser.add_argument("--max-zoom", type=int, default=DEFAULT_MAX_ZOOM)
parser.add_argument("--source-url", default=DEFAULT_TILE_URL)
parser.add_argument("--max-workers", type=int, default=8)
parser.add_argument("--timeout", type=float, default=20.0)
parser.add_argument("--retries", type=int, default=3)
parser.add_argument(
"--retry-cooldown",
type=float,
default=DEFAULT_RETRY_COOLDOWN,
help="Seconds to pause all workers after an EOX rate-limit response",
)
parser.add_argument(
"--min-request-interval",
type=float,
default=0.0,
help="Minimum seconds between tile requests across all workers",
)
args = parser.parse_args()
build_satellite_tiles(
output_path=args.output,
pmtiles_bin=args.pmtiles_bin,
pmtiles_version=args.pmtiles_version,
bbox=args.bbox,
min_zoom=args.min_zoom,
max_zoom=args.max_zoom,
source_url=args.source_url,
max_workers=max(1, args.max_workers),
timeout=args.timeout,
retries=max(0, args.retries),
retry_cooldown=max(0.0, args.retry_cooldown),
min_request_interval=max(0.0, args.min_request_interval),
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,61 @@
from zipfile import ZipFile
from pipeline.download import inspire
def _write_zip(path):
with ZipFile(path, "w") as archive:
archive.writestr("example.gml", "<gml />")
def test_parse_zip_urls_finds_relative_and_absolute_links():
html = """
<a href="/datasets/inspire/download/Adur_District_Council.zip">Download</a>
<a href="https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip">Duplicate</a>
<a href="/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip?x=1">Query suffix</a>
<a href="/datasets/llc/download/Adur_District_Council.zip">Wrong dataset</a>
<a href="https://example.com/datasets/inspire/download/Fake.zip">Wrong host</a>
"""
urls = inspire.parse_zip_urls(html)
assert urls == [
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Barnsley_Metropolitan_Borough_Council.zip",
]
def test_download_one_skips_existing_valid_zip(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
_write_zip(dest)
def fail_download(*args, **kwargs):
raise AssertionError("download should not run")
monkeypatch.setattr(inspire, "_stream_download", fail_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip (skipped, valid ZIP exists)"
def test_download_one_replaces_invalid_existing_file(monkeypatch, tmp_path):
dest = tmp_path / "Adur_District_Council.zip"
dest.write_text("not a zip")
def fake_download(url, output_path, *, timeout):
_write_zip(output_path)
monkeypatch.setattr(inspire, "_stream_download", fake_download)
result = inspire.download_one(
"https://use-land-property-data.service.gov.uk/datasets/inspire/download/Adur_District_Council.zip",
tmp_path,
)
assert result == "Adur_District_Council.zip"
assert inspire._is_valid_zip(dest)
assert not (tmp_path / "Adur_District_Council.zip.tmp").exists()

View file

@ -0,0 +1,97 @@
import urllib.error
import pytest
from pipeline.download import satellite_tiles
class _Response:
headers = {"content-type": "image/jpeg"}
def __init__(self, data: bytes = b"jpeg") -> None:
self._data = data
def read(self) -> bytes:
return self._data
def __enter__(self):
return self
def __exit__(self, exc_type, exc, traceback):
return False
class _Throttle:
def __init__(self) -> None:
self.deferred: list[float] = []
self.waits = 0
def wait(self) -> None:
self.waits += 1
def defer(self, delay: float) -> bool:
self.deferred.append(delay)
return False
def _http_error(url: str, code: int) -> urllib.error.HTTPError:
return urllib.error.HTTPError(url, code, "Forbidden", {}, None)
def test_fetch_tile_retries_eox_403_with_shared_cooldown(monkeypatch):
tile = satellite_tiles.Tile(zoom=9, x=248, y=172)
calls: list[str] = []
def fake_urlopen(req, timeout):
calls.append(req.full_url)
if len(calls) == 1:
raise _http_error(req.full_url, 403)
return _Response()
monkeypatch.setattr(satellite_tiles.urllib.request, "urlopen", fake_urlopen)
throttle = _Throttle()
fetched_tile, data = satellite_tiles._fetch_tile(
tile,
satellite_tiles.DEFAULT_TILE_URL,
timeout=1.0,
retries=1,
throttle=throttle,
retry_cooldown=15.0,
)
assert fetched_tile == tile
assert data == b"jpeg"
assert calls == [
"https://tiles.maps.eox.at/wmts/1.0.0/s2cloudless_3857/default/"
"GoogleMapsCompatible/9/172/248.jpg",
"https://tiles.maps.eox.at/wmts/1.0.0/s2cloudless_3857/default/"
"GoogleMapsCompatible/9/172/248.jpg",
]
assert throttle.deferred == [15.0]
assert throttle.waits == 2
def test_fetch_tile_does_not_retry_non_eox_403(monkeypatch):
tile = satellite_tiles.Tile(zoom=9, x=248, y=172)
calls: list[str] = []
def fake_urlopen(req, timeout):
calls.append(req.full_url)
raise _http_error(req.full_url, 403)
monkeypatch.setattr(satellite_tiles.urllib.request, "urlopen", fake_urlopen)
throttle = _Throttle()
with pytest.raises(RuntimeError, match="HTTP Error 403"):
satellite_tiles._fetch_tile(
tile,
"https://example.com/{z}/{x}/{y}.jpg",
timeout=1.0,
retries=1,
throttle=throttle,
retry_cooldown=15.0,
)
assert calls == ["https://example.com/9/248/172.jpg"]
assert throttle.deferred == []

View file

@ -1,960 +0,0 @@
import argparse
import re
import tempfile
from pathlib import Path
import polars as pl
from thefuzz import fuzz
from tqdm import tqdm
from pipeline.local_temp import local_tmp_dir
from pipeline.transform.join_epc_pp import _scan_epc_certificates
from pipeline.utils.fuzzy_join import normalize_address_key, normalize_postcode_key
from pipeline.utils.postcode_mapping import build_postcode_mapping
MIN_FLOOR_AREA_M2 = 10.0
PROPERTY_MATCH_MIN_SCORE_WITH_NUMBERS = 82.0
PROPERTY_MATCH_MIN_SCORE_WITHOUT_NUMBERS = 96.0
PROPERTY_MATCH_MIN_MARGIN = 4.0
EPC_MATCH_MIN_SCORE_WITH_NUMBERS = 82.0
EPC_MATCH_MIN_SCORE_WITHOUT_NUMBERS = 96.0
EPC_MATCH_MIN_MARGIN = 4.0
ENRICHMENT_VERSION = 1
_NUMBER_RE = re.compile(r"\d+")
LISTING_REQUIRED_COLUMNS = [
"Bedrooms",
"Bathrooms",
"Number of bedrooms & living rooms",
"lon",
"lat",
"Postcode",
"Address per Property Register",
"Leasehold/Freehold",
"Property type",
"Property sub-type",
"Price qualifier",
"Total floor area (sqm)",
"Listing URL",
"Listing features",
"Listing date",
"Listing status",
"Asking price",
"Asking price per sqm",
]
PROPERTY_CANDIDATE_COLUMNS = [
"Address per Property Register",
"Postcode",
"Leasehold/Freehold",
"Last known price",
"Date of last transaction",
"Address per EPC",
"Current energy rating",
"Potential energy rating",
"Total floor area (sqm)",
"Number of bedrooms & living rooms",
"Interior height (m)",
"Construction year",
"Former council house",
"Is construction date approximate",
"Listed building",
"Estimated monthly rent",
"Street tree density percentile",
"Property type",
"Price per sqm",
"Estimated current price",
"Est. price per sqm",
]
PROPERTY_ENRICHMENT_COLUMNS = [
"Address per EPC",
"Current energy rating",
"Potential energy rating",
"Interior height (m)",
"Construction year",
"Former council house",
"Is construction date approximate",
"Listed building",
"Estimated monthly rent",
"Street tree density percentile",
"Date of last transaction",
]
EPC_ENRICHMENT_COLUMNS = [
"Address per EPC",
"Current energy rating",
"Potential energy rating",
"Total floor area (sqm)",
"Number of bedrooms & living rooms",
"Interior height (m)",
"Construction year",
"Former council house",
]
EPC_RATING_VALUES = ["A", "B", "C", "D", "E", "F", "G"]
TENURE_VALUES = ["Freehold", "Leasehold"]
PROPERTY_TYPE_VALUES = [
"Detached",
"Semi-Detached",
"Terraced",
"Flats/Maisonettes",
"Other",
]
COLUMN_DTYPES = {
"Address per EPC": pl.Utf8,
"Current energy rating": pl.Utf8,
"Potential energy rating": pl.Utf8,
"Total floor area (sqm)": pl.Float64,
"Number of bedrooms & living rooms": pl.Int32,
"Interior height (m)": pl.Float64,
"Construction year": pl.UInt16,
"Former council house": pl.Utf8,
"Is construction date approximate": pl.UInt8,
"Listed building": pl.Utf8,
"Estimated monthly rent": pl.Float32,
"Street tree density percentile": pl.Float32,
"Date of last transaction": pl.Datetime("us"),
"Property type": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
}
def _canonical_postcode_expr(column: str) -> pl.Expr:
compact = (
pl.col(column)
.cast(pl.Utf8)
.str.to_uppercase()
.str.replace_all(r"[^A-Z0-9]+", "")
.str.strip_chars()
)
return (
pl.when(compact.str.contains(r"^[A-Z]{1,2}\d[A-Z\d]?\d[A-Z]{2}$"))
.then(compact.str.replace(r"^(.+)([0-9][A-Z]{2})$", "${1} ${2}"))
.otherwise(None)
)
def _clean_string_expr(column: str) -> pl.Expr:
stripped = pl.col(column).cast(pl.Utf8).str.strip_chars()
return pl.when(stripped == "").then(None).otherwise(stripped)
def _coalesce_non_empty(*columns: str) -> pl.Expr:
return pl.coalesce(
[
pl.when(pl.col(column).cast(pl.Utf8).str.strip_chars() == "")
.then(None)
.otherwise(pl.col(column).cast(pl.Utf8))
for column in columns
]
)
def _valid_number_expr(column: str) -> pl.Expr:
return pl.when(pl.col(column).is_finite()).then(pl.col(column)).otherwise(None)
def _read_listings(listings_path: Path, arcgis_path: Path) -> pl.DataFrame:
schema = pl.scan_parquet(listings_path).collect_schema()
missing = sorted(set(LISTING_REQUIRED_COLUMNS) - set(schema.names()))
if missing:
raise ValueError(f"{listings_path} is missing listing columns: {missing}")
listings = (
pl.scan_parquet(listings_path)
.with_row_index("_listing_idx")
.with_columns(
_canonical_postcode_expr("Postcode").alias("_original_postcode"),
normalize_address_key(pl.col("Address per Property Register")).alias(
"_listing_match_address"
),
normalize_postcode_key(pl.col("Postcode")).alias("_listing_match_postcode"),
)
.collect(engine="streaming")
)
postcode_mapping = build_postcode_mapping(arcgis_path)
listings = (
listings.join(
postcode_mapping,
left_on="_original_postcode",
right_on="old_postcode",
how="left",
)
.with_columns(
pl.coalesce("new_postcode", "_original_postcode", "Postcode").alias(
"Postcode"
),
)
.drop("new_postcode", strict=False)
.with_columns(
normalize_postcode_key(pl.col("Postcode")).alias("_listing_match_postcode"),
)
)
return listings
def _load_property_candidates(
properties_path: Path, listing_postcodes: list[str]
) -> pl.DataFrame:
schema = pl.scan_parquet(properties_path).collect_schema()
columns = [
column for column in PROPERTY_CANDIDATE_COLUMNS if column in schema.names()
]
missing = sorted(
set(
[
"Address per Property Register",
"Postcode",
"Property type",
"Total floor area (sqm)",
]
)
- set(columns)
)
if missing:
raise ValueError(f"{properties_path} is missing property columns: {missing}")
return (
pl.scan_parquet(properties_path)
.select(columns)
.with_columns(
normalize_postcode_key(pl.col("Postcode")).alias("_match_postcode")
)
.filter(pl.col("_match_postcode").is_in(listing_postcodes))
.with_columns(
normalize_address_key(pl.col("Address per Property Register")).alias(
"_match_register_address"
),
normalize_address_key(pl.col("Address per EPC")).alias("_match_epc_address")
if "Address per EPC" in columns
else pl.lit(None, dtype=pl.Utf8).alias("_match_epc_address"),
)
.filter(
pl.col("_match_register_address").is_not_null()
| pl.col("_match_epc_address").is_not_null()
)
.with_row_index("_property_row")
.collect(engine="streaming")
)
def _property_candidates_by_postcode(
candidates: pl.DataFrame,
) -> dict[str, list[dict]]:
buckets: dict[str, list[dict]] = {}
for row in candidates.iter_rows(named=True):
postcode = row.get("_match_postcode")
if postcode:
buckets.setdefault(postcode, []).append(row)
return buckets
def _numbers_compatible(left: str | None, right: str | None) -> bool:
if not left or not right:
return False
left_nums = set(_NUMBER_RE.findall(left))
right_nums = set(_NUMBER_RE.findall(right))
smaller, larger = (
(left_nums, right_nums)
if len(left_nums) <= len(right_nums)
else (right_nums, left_nums)
)
if not smaller and larger:
return False
return smaller.issubset(larger)
def _has_number(address: str | None) -> bool:
return bool(address and _NUMBER_RE.search(address))
def _ratio_bonus(
left: float | int | None, right: float | int | None, pct: float, cap: float
) -> float:
if left is None or right is None:
return 0.0
try:
left_f = float(left)
right_f = float(right)
except (TypeError, ValueError):
return 0.0
if left_f <= 0 or right_f <= 0:
return 0.0
rel = abs(left_f - right_f) / max(left_f, right_f)
if rel > pct:
return 0.0
return cap * (1.0 - rel / pct)
def _rooms_bonus(left: int | None, right: int | None) -> float:
if left is None or right is None:
return 0.0
try:
diff = abs(int(left) - int(right))
except (TypeError, ValueError):
return 0.0
if diff == 0:
return 4.0
if diff == 1:
return 2.0
return 0.0
def _enum_bonus(
left: str | None, right: str | None, *, exact: float, mismatch: float
) -> float:
if not left or not right:
return 0.0
return exact if left == right else mismatch
def _address_score(query: str, candidate: str | None) -> int:
if not candidate:
return 0
return max(
fuzz.token_set_ratio(query, candidate),
fuzz.token_sort_ratio(query, candidate),
)
def _best_property_candidate(listing: dict, candidates: list[dict]) -> dict | None:
query = listing.get("_listing_match_address")
if not query:
return None
listing_has_numbers = _has_number(query)
scored: list[tuple[float, int, dict, str]] = []
for candidate in candidates:
register_address = candidate.get("_match_register_address")
epc_address = candidate.get("_match_epc_address")
if listing_has_numbers and not (
_numbers_compatible(query, register_address)
or _numbers_compatible(query, epc_address)
):
continue
register_score = _address_score(query, register_address)
epc_score = _address_score(query, epc_address)
base_score = max(register_score, epc_score)
if base_score == 0:
continue
score = float(base_score)
score += _enum_bonus(
listing.get("Property type"),
candidate.get("Property type"),
exact=7.0,
mismatch=-8.0,
)
score += _enum_bonus(
listing.get("Leasehold/Freehold"),
candidate.get("Leasehold/Freehold"),
exact=3.0,
mismatch=-3.0,
)
score += _ratio_bonus(
listing.get("Total floor area (sqm)"),
candidate.get("Total floor area (sqm)"),
pct=0.15,
cap=8.0,
)
score += _rooms_bonus(
listing.get("Number of bedrooms & living rooms"),
candidate.get("Number of bedrooms & living rooms"),
)
score += _ratio_bonus(
listing.get("Asking price"),
candidate.get("Estimated current price")
or candidate.get("Last known price"),
pct=0.25,
cap=3.0,
)
matched_address = (
"Address per Property Register"
if register_score >= epc_score
else "Address per EPC"
)
scored.append((score, base_score, candidate, matched_address))
if not scored:
return None
scored.sort(key=lambda item: item[0], reverse=True)
top = scored[0]
runner_up = scored[1][0] if len(scored) > 1 else None
margin = top[0] - runner_up if runner_up is not None else top[0]
threshold = (
PROPERTY_MATCH_MIN_SCORE_WITH_NUMBERS
if listing_has_numbers
else PROPERTY_MATCH_MIN_SCORE_WITHOUT_NUMBERS
)
if top[0] < threshold or margin < PROPERTY_MATCH_MIN_MARGIN:
return None
return {
"_listing_idx": listing["_listing_idx"],
"_property_row": top[2]["_property_row"],
"Historical property match score": round(top[0], 1),
"Historical property address score": top[1],
"Historical property match margin": round(margin, 1),
"Historical property match field": top[3],
"Historical property match status": "matched",
}
def _match_properties(listings: pl.DataFrame, candidates: pl.DataFrame) -> pl.DataFrame:
schema = {
"_listing_idx": pl.UInt32,
"_property_row": pl.UInt32,
"Historical property match score": pl.Float32,
"Historical property address score": pl.Int32,
"Historical property match margin": pl.Float32,
"Historical property match field": pl.Utf8,
"Historical property match status": pl.Utf8,
}
if candidates.is_empty():
return pl.DataFrame(schema=schema)
buckets = _property_candidates_by_postcode(candidates)
matches = []
for listing in tqdm(
listings.iter_rows(named=True),
total=listings.height,
desc="Matching historical properties",
):
postcode = listing.get("_listing_match_postcode")
if not postcode:
continue
match = _best_property_candidate(listing, buckets.get(postcode, []))
if match is not None:
matches.append(match)
if not matches:
return pl.DataFrame(schema=schema)
return pl.DataFrame(matches, schema=schema)
def _prefix_columns(df: pl.DataFrame, columns: list[str], prefix: str) -> pl.DataFrame:
rename = {column: f"{prefix}{column}" for column in columns if column in df.columns}
return df.rename(rename)
def _ensure_prefixed_columns(
df: pl.DataFrame, columns: list[str], prefix: str
) -> pl.DataFrame:
missing_exprs = [
pl.lit(None, dtype=COLUMN_DTYPES.get(column, pl.Utf8)).alias(
f"{prefix}{column}"
)
for column in columns
if f"{prefix}{column}" not in df.columns
]
if not missing_exprs:
return df
return df.with_columns(missing_exprs)
def _property_match_frame(
matches: pl.DataFrame, candidates: pl.DataFrame
) -> pl.DataFrame:
if matches.is_empty():
return matches
selected_columns = [
"_property_row",
*[
column
for column in PROPERTY_CANDIDATE_COLUMNS
if column in candidates.columns
],
]
matched = matches.join(
candidates.select(selected_columns), on="_property_row", how="left"
)
return _prefix_columns(
matched,
[column for column in PROPERTY_CANDIDATE_COLUMNS if column in matched.columns],
"_property_",
)
def _canonical_epc_property_type_expr() -> pl.Expr:
bad_built_form = pl.col("built_form").is_null() | pl.col("built_form").is_in(
["NO DATA!", "Not Recorded"]
)
has_epc = pl.col("epc_property_type").is_not_null()
is_house = pl.col("epc_property_type") == "House"
return (
pl.when(has_epc & is_house & ~bad_built_form)
.then(pl.col("built_form"))
.when(has_epc)
.then(pl.col("epc_property_type"))
.otherwise(None)
.replace(
{
"Flat": "Flats/Maisonettes",
"Maisonette": "Flats/Maisonettes",
"End-Terrace": "Terraced",
"Mid-Terrace": "Terraced",
"Enclosed End-Terrace": "Terraced",
"Enclosed Mid-Terrace": "Terraced",
"Bungalow": "Other",
"Park home": "Other",
"House": "Other",
}
)
)
def _construction_year_expr(column: str = "construction_age_band") -> pl.Expr:
return (
pl.col(column)
.cast(pl.Utf8)
.str.replace("England and Wales: ", "")
.str.replace(" onwards", "")
.str.extract(r"(\d{4})", 1)
.cast(pl.UInt16, strict=False)
)
def _fractional_year_expr(column: str) -> pl.Expr:
return (
pl.col(column).dt.year().cast(pl.Float32)
+ (pl.col(column).dt.month().cast(pl.Float32) - 1.0) / 12.0
)
def _load_epc_candidates(
epc_path: Path, listing_postcodes: list[str], temp_dir: Path
) -> pl.DataFrame:
epc_base = _scan_epc_certificates(epc_path, temp_dir).with_columns(
normalize_address_key(pl.col("epc_address")).alias("_epc_match_address"),
normalize_postcode_key(pl.col("epc_postcode")).alias("_epc_match_postcode"),
)
epc = (
epc_base.filter(pl.col("_epc_match_postcode").is_in(listing_postcodes))
.sort("inspection_date", descending=True)
.group_by("_epc_match_address", "_epc_match_postcode")
.first()
.with_columns(
_canonical_epc_property_type_expr().alias("_epc_canonical_property_type"),
_construction_year_expr().alias("Construction year"),
pl.when(pl.col("current_energy_rating").is_in(EPC_RATING_VALUES))
.then(pl.col("current_energy_rating"))
.otherwise(None)
.alias("Current energy rating"),
pl.when(pl.col("potential_energy_rating").is_in(EPC_RATING_VALUES))
.then(pl.col("potential_energy_rating"))
.otherwise(None)
.alias("Potential energy rating"),
pl.col("total_floor_area").alias("Total floor area (sqm)"),
pl.col("number_habitable_rooms").alias("Number of bedrooms & living rooms"),
pl.col("floor_height").alias("Interior height (m)"),
pl.col("epc_address").alias("Address per EPC"),
)
.drop("tenure", strict=False)
)
social_tenure = (
epc_base.filter(pl.col("_epc_match_postcode").is_in(listing_postcodes))
.filter(pl.col("tenure").str.to_lowercase().str.contains("social"))
.select("_epc_match_address", "_epc_match_postcode")
.unique()
.with_columns(pl.lit("Yes").alias("Former council house"))
)
return (
epc.join(
social_tenure,
on=["_epc_match_address", "_epc_match_postcode"],
how="left",
)
.with_columns(pl.col("Former council house").fill_null("No"))
.filter(pl.col("_epc_match_address").is_not_null())
.with_row_index("_epc_row")
.select(
"_epc_row",
"_epc_match_address",
"_epc_match_postcode",
"_epc_canonical_property_type",
*EPC_ENRICHMENT_COLUMNS,
)
.collect(engine="streaming")
)
def _epc_candidates_by_postcode(candidates: pl.DataFrame) -> dict[str, list[dict]]:
buckets: dict[str, list[dict]] = {}
for row in candidates.iter_rows(named=True):
postcode = row.get("_epc_match_postcode")
if postcode:
buckets.setdefault(postcode, []).append(row)
return buckets
def _best_epc_candidate(listing: dict, candidates: list[dict]) -> dict | None:
query = listing.get("_listing_match_address")
if not query:
return None
listing_has_numbers = _has_number(query)
scored: list[tuple[float, int, dict]] = []
for candidate in candidates:
address = candidate.get("_epc_match_address")
if listing_has_numbers and not _numbers_compatible(query, address):
continue
base_score = _address_score(query, address)
if base_score == 0:
continue
score = float(base_score)
score += _enum_bonus(
listing.get("Property type"),
candidate.get("_epc_canonical_property_type"),
exact=6.0,
mismatch=-6.0,
)
score += _ratio_bonus(
listing.get("Total floor area (sqm)"),
candidate.get("Total floor area (sqm)"),
pct=0.12,
cap=8.0,
)
score += _rooms_bonus(
listing.get("Number of bedrooms & living rooms"),
candidate.get("Number of bedrooms & living rooms"),
)
scored.append((score, base_score, candidate))
if not scored:
return None
scored.sort(key=lambda item: item[0], reverse=True)
top = scored[0]
runner_up = scored[1][0] if len(scored) > 1 else None
margin = top[0] - runner_up if runner_up is not None else top[0]
threshold = (
EPC_MATCH_MIN_SCORE_WITH_NUMBERS
if listing_has_numbers
else EPC_MATCH_MIN_SCORE_WITHOUT_NUMBERS
)
if top[0] < threshold or margin < EPC_MATCH_MIN_MARGIN:
return None
return {
"_listing_idx": listing["_listing_idx"],
"_epc_row": top[2]["_epc_row"],
"EPC match score": round(top[0], 1),
"EPC address score": top[1],
"EPC match margin": round(margin, 1),
"EPC match status": "matched",
}
def _match_epc(listings: pl.DataFrame, candidates: pl.DataFrame) -> pl.DataFrame:
schema = {
"_listing_idx": pl.UInt32,
"_epc_row": pl.UInt32,
"EPC match score": pl.Float32,
"EPC address score": pl.Int32,
"EPC match margin": pl.Float32,
"EPC match status": pl.Utf8,
}
if candidates.is_empty():
return pl.DataFrame(schema=schema)
buckets = _epc_candidates_by_postcode(candidates)
matches = []
for listing in tqdm(
listings.iter_rows(named=True),
total=listings.height,
desc="Matching EPC certificates",
):
postcode = listing.get("_listing_match_postcode")
if not postcode:
continue
match = _best_epc_candidate(listing, buckets.get(postcode, []))
if match is not None:
matches.append(match)
if not matches:
return pl.DataFrame(schema=schema)
return pl.DataFrame(matches, schema=schema)
def _epc_match_frame(matches: pl.DataFrame, candidates: pl.DataFrame) -> pl.DataFrame:
if matches.is_empty():
return matches
matched = matches.join(
candidates.select("_epc_row", *EPC_ENRICHMENT_COLUMNS),
on="_epc_row",
how="left",
)
return _prefix_columns(
matched,
[column for column in EPC_ENRICHMENT_COLUMNS if column in matched.columns],
"_epc_",
)
def _join_postcode_features(
listings: pl.DataFrame, postcode_features_path: Path
) -> pl.DataFrame:
postcode_features = pl.scan_parquet(postcode_features_path).collect(
engine="streaming"
)
return listings.join(
postcode_features, on="Postcode", how="left", suffix="_postcode"
)
def _coalesce_feature_columns(df: pl.DataFrame) -> pl.DataFrame:
with_columns: list[pl.Expr] = [
pl.lit(ENRICHMENT_VERSION, dtype=pl.UInt16).alias(
"Actual listing enrichment version"
),
_coalesce_non_empty(
"_epc_Address per EPC",
"_property_Address per EPC",
).alias("Address per EPC"),
pl.when(pl.col("Property type").is_in(PROPERTY_TYPE_VALUES))
.then(pl.col("Property type"))
.otherwise(pl.col("_property_Property type"))
.alias("Property type"),
pl.when(pl.col("Leasehold/Freehold").is_in(TENURE_VALUES))
.then(pl.col("Leasehold/Freehold"))
.otherwise(pl.col("_property_Leasehold/Freehold"))
.alias("Leasehold/Freehold"),
pl.coalesce(
_valid_number_expr("Total floor area (sqm)"),
_valid_number_expr("_epc_Total floor area (sqm)"),
_valid_number_expr("_property_Total floor area (sqm)"),
).alias("Total floor area (sqm)"),
pl.when(pl.col("Number of bedrooms & living rooms") > 0)
.then(pl.col("Number of bedrooms & living rooms"))
.otherwise(
pl.coalesce(
pl.col("_epc_Number of bedrooms & living rooms"),
pl.col("_property_Number of bedrooms & living rooms"),
)
)
.cast(pl.Int32, strict=False)
.alias("Number of bedrooms & living rooms"),
pl.col("Asking price").alias("Estimated current price"),
pl.col("Asking price").alias("Last known price"),
_coalesce_non_empty(
"_epc_Current energy rating",
"_property_Current energy rating",
).alias("Current energy rating"),
_coalesce_non_empty(
"_epc_Potential energy rating",
"_property_Potential energy rating",
).alias("Potential energy rating"),
pl.coalesce(
_valid_number_expr("_epc_Interior height (m)"),
_valid_number_expr("_property_Interior height (m)"),
).alias("Interior height (m)"),
pl.coalesce(
pl.col("_epc_Construction year"),
pl.col("_property_Construction year"),
)
.cast(pl.UInt16, strict=False)
.alias("Construction year"),
_coalesce_non_empty(
"_epc_Former council house",
"_property_Former council house",
)
.fill_null("No")
.alias("Former council house"),
pl.col("_property_Is construction date approximate").alias(
"Is construction date approximate"
),
pl.col("_property_Listed building").fill_null("No").alias("Listed building"),
pl.col("_property_Estimated monthly rent").alias("Estimated monthly rent"),
pl.col("_property_Street tree density percentile").alias(
"Street tree density percentile"
),
_fractional_year_expr("_property_Date of last transaction").alias(
"Date of last transaction"
),
]
df = df.with_columns(with_columns)
df = df.with_columns(
pl.when(
pl.col("Asking price").is_not_null()
& pl.col("Total floor area (sqm)").is_not_null()
& (pl.col("Total floor area (sqm)") > 0)
)
.then((pl.col("Asking price") / pl.col("Total floor area (sqm)")).round(0))
.otherwise(None)
.cast(pl.Int32, strict=False)
.alias("Asking price per sqm"),
).with_columns(
pl.col("Asking price per sqm").alias("Est. price per sqm"),
pl.col("Asking price per sqm").alias("Price per sqm"),
)
return df
def _drop_internal_columns(df: pl.DataFrame) -> pl.DataFrame:
internal_prefixes = ("_property_", "_epc_")
internal_exact = {
"_listing_idx",
"_listing_match_address",
"_listing_match_postcode",
"_original_postcode",
"_property_row",
"_epc_row",
"lat_postcode",
"lon_postcode",
}
drop_cols = [
column
for column in df.columns
if column in internal_exact or column.startswith(internal_prefixes)
]
return df.drop(drop_cols, strict=False)
def build_enriched_actual_listings(
listings_path: Path,
properties_path: Path,
postcode_features_path: Path,
arcgis_path: Path,
output_path: Path,
*,
epc_path: Path | None = None,
) -> pl.DataFrame:
print(f"Loading listings from {listings_path}...")
listings = _read_listings(listings_path, arcgis_path)
listing_postcodes = (
listings.select("_listing_match_postcode")
.drop_nulls()
.unique()
.to_series()
.to_list()
)
print(f"Listings: {listings.height}; unique postcodes: {len(listing_postcodes)}")
print(f"Loading property candidates from {properties_path}...")
property_candidates = _load_property_candidates(properties_path, listing_postcodes)
print(f"Property candidates: {property_candidates.height}")
property_matches = _match_properties(listings, property_candidates)
print(f"Historical property matches: {property_matches.height}")
property_match_frame = _property_match_frame(property_matches, property_candidates)
enriched = _join_postcode_features(listings, postcode_features_path)
if not property_match_frame.is_empty():
enriched = enriched.join(property_match_frame, on="_listing_idx", how="left")
else:
enriched = enriched.with_columns(
pl.lit(None, dtype=pl.Utf8).alias("Historical property match status")
)
if epc_path is not None:
with tempfile.TemporaryDirectory(
prefix="actual_listing_epc_", dir=local_tmp_dir()
) as tmpdir:
print(f"Loading EPC candidates from {epc_path}...")
epc_candidates = _load_epc_candidates(
epc_path, listing_postcodes, Path(tmpdir)
)
print(f"EPC candidates: {epc_candidates.height}")
epc_matches = _match_epc(listings, epc_candidates)
print(f"EPC matches: {epc_matches.height}")
epc_match_frame = _epc_match_frame(epc_matches, epc_candidates)
if not epc_match_frame.is_empty():
enriched = enriched.join(epc_match_frame, on="_listing_idx", how="left")
else:
enriched = enriched.with_columns(
pl.lit(None, dtype=pl.Utf8).alias("EPC match status")
)
else:
enriched = enriched.with_columns(
pl.lit(None, dtype=pl.Utf8).alias("EPC match status")
)
enriched = _ensure_prefixed_columns(
enriched, PROPERTY_CANDIDATE_COLUMNS, "_property_"
)
enriched = _ensure_prefixed_columns(enriched, EPC_ENRICHMENT_COLUMNS, "_epc_")
enriched = _coalesce_feature_columns(enriched)
enriched = _drop_internal_columns(enriched)
output_path.parent.mkdir(parents=True, exist_ok=True)
enriched.write_parquet(output_path)
size_mb = output_path.stat().st_size / (1024 * 1024)
print(
f"Wrote {enriched.height} enriched listings to {output_path} ({size_mb:.1f} MB)"
)
return enriched
def main() -> None:
parser = argparse.ArgumentParser(
description="Build a pre-enriched actual-listings parquet for the server"
)
parser.add_argument(
"--listings",
type=Path,
default=Path("finder/data/online_listings_buy.parquet"),
help="Input scraped listings parquet",
)
parser.add_argument(
"--properties",
type=Path,
default=Path("property-data/properties.parquet"),
help="Historical properties parquet",
)
parser.add_argument(
"--postcode-features",
type=Path,
default=Path("property-data/postcode.parquet"),
help="Postcode feature parquet",
)
parser.add_argument(
"--arcgis",
type=Path,
default=Path("property-data/arcgis_data.parquet"),
help="ArcGIS/NSPL postcode parquet used for terminated-postcode remapping",
)
parser.add_argument(
"--epc",
type=Path,
default=Path("manual-data/domestic-csv.zip"),
help="Optional EPC certificates CSV/zip for direct listing-to-EPC fuzzy matching",
)
parser.add_argument(
"--no-epc",
action="store_true",
help="Skip direct EPC matching even when --epc exists",
)
parser.add_argument(
"--output",
type=Path,
default=Path("finder/data/online_listings_buy_enriched.parquet"),
help="Output enriched listings parquet",
)
args = parser.parse_args()
epc_path = None if args.no_epc else args.epc
if epc_path is not None and not epc_path.exists():
print(
f"EPC source not found at {epc_path}; continuing without direct EPC matching"
)
epc_path = None
build_enriched_actual_listings(
listings_path=args.listings,
properties_path=args.properties,
postcode_features_path=args.postcode_features,
arcgis_path=args.arcgis,
epc_path=epc_path,
output_path=args.output,
)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load diff

View file

@ -53,7 +53,7 @@ Build an STRtree spatial index over the INSPIRE candidate polygons. Convert all
For each INSPIRE parcel that contains at least one UPRN, run a majority vote: whichever postcode has the most UPRNs inside that parcel wins the parcel. Accumulate winning parcels per postcode, union them, and clip to the OA boundary. The result is `claimed[postcode] = polygon_within_oa`.
Then resolve overlaps: INSPIRE parcels can overlap geographically (digitization overlaps), so two postcodes might claim the same square meters. Walk through the claimed dict in insertion order (the postcode with the most parcel wins gets priority by virtue of appearing first), subtracting the running union from each subsequent postcode's geometry.
For INSPIRE parcels with no contained UPRN, assign the clipped parcel to the nearest UPRN's postcode using the parcel's representative point. These nearest-postcode claims run after contained-UPRN claims, so explicit address-in-parcel evidence keeps priority. Then resolve overlaps: INSPIRE parcels can overlap geographically (digitization overlaps), so two postcodes might claim the same square meters. Walk through claims in priority order, subtracting the running union from each subsequent postcode's geometry.
#### Stage B: Voronoi distribution of remaining area
@ -67,7 +67,7 @@ The Voronoi computation (`voronoi.py`):
5. For each real point's Voronoi cell, constructs the polygon from the Voronoi vertices, clips to the boundary, groups by postcode
6. Unions per-postcode fragments
The effect: every unclaimed patch of OA gets assigned to the nearest postcode by straight-line distance (Voronoi tessellation is exactly the set of all points nearest to each generator).
The effect: every non-parcel patch of OA gets assigned to the nearest postcode by straight-line distance (Voronoi tessellation is exactly the set of all points nearest to each generator).
#### Stage C: Combine
@ -77,7 +77,7 @@ The output of `process_oa` is `list[(postcode, polygon)]` — the per-OA fragmen
### Phase 4: Merging and writing
**Fragment merging** (`output.py:merge_fragments`): Groups all fragments by postcode, unions them. If the result is a MultiPolygon (meaning the postcode has disconnected pieces — either from spanning OAs with a gap, or algorithm artifacts), applies a 1m buffer-then-unbuffer to close tiny gaps from floating-point mismatches at OA boundary edges. If still a MultiPolygon after that, keeps only the largest polygon — postcodes are contiguous delivery routes, so detached fragments are artifacts.
**Fragment merging** (`output.py:merge_fragments`): Groups all fragments by postcode, unions them. If the result is a MultiPolygon (meaning the postcode has disconnected pieces — either from spanning OAs with a gap, or algorithm artifacts), applies a 5m buffer-then-unbuffer to close tiny gaps from floating-point mismatches at OA boundary edges. If still a MultiPolygon after that, keeps only the largest polygon — postcodes are contiguous delivery routes, so detached fragments are artifacts.
**GeoJSON output** (`output.py:write_district_geojson`): Groups postcodes by district (the outward code, e.g. `SW1A` from `SW1A 1AA`). For each district, converts every postcode polygon from BNG to WGS84 using pyproj, simplifies with 1m tolerance (Douglas-Peucker), rounds coordinates to 6 decimal places (~0.1m precision), and writes a single `{district}.geojson` FeatureCollection. Each Feature has `postcodes` (formatted like `"SW1A 1AA"`) and `mapit_code` (no space: `"SW1A1AA"`) in its properties.

View file

@ -8,9 +8,10 @@ Algorithm per OA:
1. Single-postcode OA entire OA polygon assigned to that postcode
2. Multi-postcode OA:
a. Assign INSPIRE parcels to postcodes via UPRN point-in-polygon majority vote
b. Union INSPIRE parcels per postcode, clip to OA "claimed" area
c. Distribute remaining (unclaimed) OA area via Voronoi of UPRN points
d. Final polygon = claimed + Voronoi share
b. Assign INSPIRE parcels with no contained UPRN to the nearest UPRN postcode
c. Union parcel claims per postcode, clip to OA "claimed" area
d. Distribute remaining non-parcel OA area via Voronoi of UPRN points
e. Final polygon = parcel claims + Voronoi share
Memory-efficient design (<12GB total):
- INSPIRE polygons stored as raw coordinate bytes in parquet; Shapely objects built

View file

@ -1,12 +1,15 @@
from collections import Counter, defaultdict
import numpy as np
from scipy.spatial import cKDTree
from shapely import STRtree, make_valid
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
from .voronoi import compute_voronoi_regions
MIN_GEOM_AREA = 0.01
def process_oa(
oa_geom: Polygon | MultiPolygon,
@ -19,76 +22,31 @@ def process_oa(
if len(unique_pcs) == 1:
return [(next(iter(unique_pcs)), oa_geom)]
# Try INSPIRE-based assignment
claimed: dict[str, Polygon | MultiPolygon] = {}
if len(points) == 0:
return []
valid_oa = _clean_polygonal(oa_geom)
if valid_oa is None:
return []
if inspire_candidates:
cand_tree = STRtree(inspire_candidates)
from shapely import points as shp_points
uprn_pts = shp_points(points)
pt_idx, cand_idx = cand_tree.query(uprn_pts, predicate="intersects")
# Majority vote per candidate polygon
cand_postcodes: dict[int, list[str]] = defaultdict(list)
for pi, ci in zip(pt_idx, cand_idx):
cand_postcodes[ci].append(postcodes[pi])
pc_inspire_polys: dict[str, list[Polygon]] = defaultdict(list)
for ci, pc_list in cand_postcodes.items():
winner = Counter(pc_list).most_common(1)[0][0]
pc_inspire_polys[winner].append(inspire_candidates[ci])
for pc, polys in pc_inspire_polys.items():
merged = unary_union(polys)
if not merged.is_valid:
merged = make_valid(merged)
valid_oa = oa_geom if oa_geom.is_valid else make_valid(oa_geom)
clipped = merged.intersection(valid_oa)
if not clipped.is_empty:
if not clipped.is_valid:
clipped = make_valid(clipped)
clipped = _extract_polygonal(clipped)
if clipped is not None:
claimed[pc] = clipped
# Resolve overlaps: INSPIRE parcels can overlap geographically, so two
# postcodes may claim the same area. Give contested area to whichever
# postcode claimed it first (most UPRNs → first in insertion order).
if len(claimed) > 1:
resolved: dict[str, Polygon | MultiPolygon] = {}
used = None
for pc, geom in claimed.items():
if used is not None:
if not geom.is_valid:
geom = make_valid(geom)
if not used.is_valid:
used = make_valid(used)
geom = geom.difference(used)
if geom.is_empty:
continue
geom = _extract_polygonal(geom)
if geom is None:
continue
resolved[pc] = geom
used = geom if used is None else unary_union([used, geom])
claimed = resolved
claimed = _claim_inspire_parcels(valid_oa, points, postcodes, inspire_candidates)
else:
claimed = {}
# Compute remaining area
if claimed:
all_claimed = unary_union(list(claimed.values()))
if not all_claimed.is_valid:
all_claimed = make_valid(all_claimed)
valid_oa = oa_geom if oa_geom.is_valid else make_valid(oa_geom)
remaining = valid_oa.difference(all_claimed)
if not remaining.is_valid:
remaining = make_valid(remaining)
all_claimed = _clean_polygonal(all_claimed)
remaining = (
valid_oa.difference(all_claimed) if all_claimed is not None else valid_oa
)
remaining = _clean_polygonal(remaining)
else:
remaining = oa_geom if oa_geom.is_valid else make_valid(oa_geom)
remaining = valid_oa
# Distribute remaining area via Voronoi
if not remaining.is_empty and remaining.area > 0.01:
# Distribute non-parcel land via Voronoi
if remaining is not None and not remaining.is_empty and remaining.area > MIN_GEOM_AREA:
voronoi_result = compute_voronoi_regions(points, postcodes, remaining)
else:
voronoi_result = {}
@ -102,17 +60,167 @@ def process_oa(
fragments = []
for pc, parts in result.items():
merged = unary_union(parts)
if not merged.is_empty:
if not merged.is_valid:
merged = make_valid(merged)
merged = _extract_polygonal(merged)
if merged is not None:
fragments.append((pc, merged))
merged = _clean_polygonal(unary_union(parts))
if merged is not None:
fragments.append((pc, merged))
return fragments
def _claim_inspire_parcels(
valid_oa: Polygon | MultiPolygon,
points: np.ndarray,
postcodes: list[str],
inspire_candidates: list[Polygon],
) -> dict[str, Polygon | MultiPolygon]:
"""Assign INSPIRE parcels to postcodes before Voronoi fills non-parcel land."""
parcels = _prepare_inspire_parcels(valid_oa, inspire_candidates)
if not parcels:
return {}
cand_tree = STRtree(parcels)
from shapely import points as shp_points
uprn_pts = shp_points(points)
pt_idx, cand_idx = cand_tree.query(uprn_pts, predicate="within")
# First priority: parcels that physically contain UPRNs. Majority vote
# resolves blocks of flats or overlapping parcel data.
cand_postcodes: dict[int, list[str]] = defaultdict(list)
for pi, ci in zip(pt_idx, cand_idx):
cand_postcodes[ci].append(postcodes[pi])
contained_parts: dict[str, list] = defaultdict(list)
contained_scores: Counter[str] = Counter()
for ci, pc_list in cand_postcodes.items():
pc_counts = Counter(pc_list)
winner, votes = pc_counts.most_common(1)[0]
contained_parts[winner].append(parcels[ci])
contained_scores[winner] += votes
contained_claimed = _merge_parts_by_postcode(contained_parts)
contained_claims = sorted(
contained_claimed.items(),
key=lambda item: (-contained_scores[item[0]], -item[1].area, item[0]),
)
# Second priority: remaining INSPIRE parcels with no contained UPRN. Assign
# each to the nearest UPRN/postcode so parcel boundaries carry more of the
# visible postcode shape; Voronoi is then limited to roads, parks, water, and
# any other non-parcel gaps.
points_f64 = points.astype(np.float64, copy=False)
contained_union = _union_claims(contained_claims)
nearest_tree = cKDTree(points_f64)
nearest_parts: dict[str, list] = defaultdict(list)
for i, parcel in enumerate(parcels):
if i in cand_postcodes:
continue
assignable = parcel
if contained_union is not None:
assignable = assignable.difference(contained_union)
for part in _polygon_parts(assignable):
part = _clean_polygonal(part)
if part is None:
continue
pc = _nearest_postcode(part, nearest_tree, postcodes)
nearest_parts[pc].append(part)
nearest_claimed = _merge_parts_by_postcode(nearest_parts)
nearest_claims = sorted(
nearest_claimed.items(),
key=lambda item: (-item[1].area, item[0]),
)
return _resolve_ordered_claims(contained_claims + nearest_claims)
def _prepare_inspire_parcels(
valid_oa: Polygon | MultiPolygon,
inspire_candidates: list[Polygon],
) -> list[Polygon | MultiPolygon]:
parcels: list[Polygon | MultiPolygon] = []
for candidate in inspire_candidates:
geom = _clean_polygonal(candidate)
if geom is None:
continue
if not geom.intersects(valid_oa):
continue
clipped = _clean_polygonal(geom.intersection(valid_oa))
if clipped is not None:
parcels.append(clipped)
return parcels
def _nearest_postcode(
geom: Polygon | MultiPolygon,
tree: cKDTree,
postcodes: list[str],
) -> str:
point = geom.representative_point()
_, idx = tree.query([point.x, point.y])
return postcodes[idx]
def _polygon_parts(geom) -> list[Polygon]:
geom = _clean_polygonal(geom)
if geom is None:
return []
if geom.geom_type == "Polygon":
return [geom]
return list(geom.geoms)
def _merge_parts_by_postcode(
parts_by_postcode: dict[str, list],
) -> dict[str, Polygon | MultiPolygon]:
merged: dict[str, Polygon | MultiPolygon] = {}
for pc, parts in parts_by_postcode.items():
geom = _clean_polygonal(unary_union(parts))
if geom is not None:
merged[pc] = geom
return merged
def _union_claims(
claims: list[tuple[str, Polygon | MultiPolygon]],
) -> Polygon | MultiPolygon | None:
if not claims:
return None
return _clean_polygonal(unary_union([geom for _, geom in claims]))
def _resolve_ordered_claims(
claims: list[tuple[str, Polygon | MultiPolygon]],
) -> dict[str, Polygon | MultiPolygon]:
"""Resolve overlapping parcel claims in priority order."""
resolved_parts: dict[str, list] = defaultdict(list)
used = None
for pc, geom in claims:
geom = _clean_polygonal(geom)
if geom is None:
continue
if used is not None:
geom = _clean_polygonal(geom.difference(used))
if geom is None:
continue
resolved_parts[pc].append(geom)
used = _clean_polygonal(geom if used is None else unary_union([used, geom]))
return _merge_parts_by_postcode(resolved_parts)
def _clean_polygonal(geom) -> Polygon | MultiPolygon | None:
if geom is None or geom.is_empty:
return None
if not geom.is_valid:
geom = make_valid(geom)
geom = _extract_polygonal(geom)
if geom is None or geom.is_empty or geom.area <= MIN_GEOM_AREA:
return None
return geom
def _extract_polygonal(geom) -> Polygon | MultiPolygon | None:
"""Extract only Polygon/MultiPolygon parts from a geometry.

View file

@ -7,6 +7,7 @@ import numpy as np
import polars as pl
import pytest
from shapely.geometry import MultiPolygon, Polygon, box
from shapely.ops import unary_union
from .oa_boundaries import parse_gpkg_geometry
from .greenspace import subtract_greenspace
@ -215,6 +216,20 @@ class TestVoronoiCollinear:
assert ratio > 0.3, f"Area split too unfair: {area_a:.0f} vs {area_b:.0f}"
class TestVoronoiCoverage:
"""Voronoi fallback should cover large OAs even when UPRNs are clustered."""
def test_clustered_points_cover_large_boundary(self):
boundary = box(0, 0, 5000, 100)
points = np.array([[10, 50], [20, 50]])
result = compute_voronoi_regions(points, ["A", "B"], boundary)
covered = unary_union(list(result.values()))
assert covered.area == pytest.approx(boundary.area)
assert boundary.difference(covered).area < 0.01
class TestEqualSplitFallback:
"""_equal_split_fallback must give every postcode some area."""
@ -306,6 +321,186 @@ class TestProcessOAGeometryTypes:
)
class TestProcessOAInspireParcelAssignment:
"""INSPIRE parcels without UPRNs should still shape postcode boundaries."""
def test_unoccupied_inspire_parcel_goes_to_nearest_postcode(self):
"""A parcel with no contained UPRN should not be split by Voronoi."""
oa_geom = box(0, 0, 100, 100)
parcel = box(20, 40, 65, 60) # crosses the x=50 Voronoi split
points = np.array(
[
[10, 50], # postcode A
[90, 50], # postcode B
]
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[parcel])
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
assert parcel.difference(frag_dict["A"]).area < 0.01
assert frag_dict["B"].intersection(parcel).area < 0.01
def test_contained_uprn_claim_wins_over_overlapping_nearest_parcel(self):
"""Contained-UPRN parcel claims should keep priority over nearest claims."""
oa_geom = box(0, 0, 100, 100)
contained_a = box(0, 0, 60, 100)
unoccupied_nearer_b = box(50, 0, 80, 100)
points = np.array(
[
[20, 50], # postcode A, inside contained_a
[90, 50], # postcode B, outside unoccupied_nearer_b
]
)
postcodes = ["A", "B"]
fragments = process_oa(
oa_geom,
points,
postcodes,
inspire_candidates=[contained_a, unoccupied_nearer_b],
)
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
assert contained_a.difference(frag_dict["A"]).area < 0.01
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
assert frag_dict["B"].intersection(box(60, 0, 80, 100)).area > 0
def test_nearest_uses_assignable_fragment_after_contained_subtraction(self):
"""Nearest assignment should use the part left after priority subtraction."""
oa_geom = box(0, 0, 100, 100)
contained_a = box(0, 0, 60, 100)
unoccupied = box(25, 0, 80, 100)
points = np.array(
[
[20, 50], # postcode A, inside contained_a
[90, 50], # postcode B, nearest to unoccupied remainder
]
)
postcodes = ["A", "B"]
fragments = process_oa(
oa_geom,
points,
postcodes,
inspire_candidates=[contained_a, unoccupied],
)
frag_dict = dict(fragments)
assert contained_a.difference(frag_dict["A"]).area < 0.01
assert box(60, 0, 80, 100).difference(frag_dict["B"]).area < 0.01
def test_boundary_uprn_does_not_claim_adjacent_parcel(self):
"""A UPRN on a parcel edge should not count inside both parcels."""
oa_geom = box(0, 0, 100, 100)
left = box(0, 0, 50, 100)
right = box(50, 0, 100, 100)
points = np.array(
[
[50, 50], # postcode A, exactly on shared parcel boundary
[75, 50], # postcode B, strictly inside right parcel
]
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
assert right.difference(frag_dict["B"]).area < 0.01
def test_disconnected_nearest_fragments_can_go_to_different_postcodes(self):
"""A split unoccupied parcel should be assigned component by component."""
oa_geom = box(0, 0, 100, 100)
contained_b = box(40, 0, 60, 100)
unoccupied = box(0, 40, 100, 60)
points = np.array(
[
[10, 20], # postcode A, nearest to left split fragment
[50, 20], # postcode B, inside contained_b but outside unoccupied
[90, 20], # postcode C, nearest to right split fragment
]
)
postcodes = ["A", "B", "C"]
fragments = process_oa(
oa_geom,
points,
postcodes,
inspire_candidates=[contained_b, unoccupied],
)
frag_dict = dict(fragments)
assert box(0, 40, 40, 60).difference(frag_dict["A"]).area < 0.01
assert box(60, 40, 100, 60).difference(frag_dict["C"]).area < 0.01
def test_overlapping_nearest_parcels_do_not_overlap_in_output(self):
"""Two unoccupied nearest-assigned parcels should be resolved cleanly."""
oa_geom = box(0, 0, 100, 100)
left = box(0, 0, 70, 100)
right = box(30, 0, 100, 100)
points = np.array(
[
[10, 50], # postcode A, nearest to left parcel
[90, 50], # postcode B, nearest to right parcel
]
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
def test_mixed_inspire_and_voronoi_covers_oa_without_overlap(self):
"""Parcel claims plus Voronoi fallback should cover the whole OA."""
oa_geom = box(0, 0, 100, 100)
contained_a = box(0, 0, 30, 100)
unoccupied = box(70, 0, 90, 100)
points = np.array(
[
[10, 50],
[90, 50],
]
)
postcodes = ["A", "B"]
fragments = process_oa(
oa_geom,
points,
postcodes,
inspire_candidates=[contained_a, unoccupied],
)
geoms = [geom for _, geom in fragments]
covered = unary_union(geoms)
overlap = sum(geom.area for geom in geoms) - covered.area
assert covered.area == pytest.approx(oa_geom.area)
assert oa_geom.difference(covered).area < 0.01
assert overlap < 0.01
def test_inspire_parcel_straddling_oa_is_clipped(self):
"""INSPIRE parcels crossing the OA boundary should not leak outside it."""
oa_geom = box(0, 0, 100, 100)
straddling = box(80, 0, 140, 100)
points = np.array(
[
[10, 50],
[90, 50],
]
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[straddling])
for _, geom in fragments:
assert geom.difference(oa_geom).area < 0.01
# ---------------------------------------------------------------------------
# _extract_polygonal helper
# ---------------------------------------------------------------------------

View file

@ -52,9 +52,17 @@ def compute_voronoi_regions(
if len(unique_pts) == 1:
return {unique_pcs[0]: boundary}
if not boundary.is_valid:
boundary = make_valid(boundary)
pts = np.array(unique_pts)
min_e, min_n = pts.min(axis=0)
max_e, max_n = pts.max(axis=0)
pts_min_e, pts_min_n = pts.min(axis=0)
pts_max_e, pts_max_n = pts.max(axis=0)
boundary_min_e, boundary_min_n, boundary_max_e, boundary_max_n = boundary.bounds
min_e = min(pts_min_e, boundary_min_e)
min_n = min(pts_min_n, boundary_min_n)
max_e = max(pts_max_e, boundary_max_e)
max_n = max(pts_max_n, boundary_max_n)
span = max(max_e - min_e, max_n - min_n, 100)
dummy = np.array(
@ -79,9 +87,6 @@ def compute_voronoi_regions(
n_real = len(pts)
pc_polys: dict[str, list[Polygon]] = defaultdict(list)
if not boundary.is_valid:
boundary = make_valid(boundary)
for i in range(n_real):
region_idx = vor.point_region[i]
region = vor.regions[region_idx]

View file

@ -1,143 +0,0 @@
from pathlib import Path
import polars as pl
from pipeline.transform.enrich_actual_listings import build_enriched_actual_listings
def test_build_enriched_actual_listings_joins_postcode_and_property_features(
tmp_path: Path,
) -> None:
listings_path = tmp_path / "listings.parquet"
properties_path = tmp_path / "properties.parquet"
postcode_path = tmp_path / "postcode.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
output_path = tmp_path / "online_listings_buy_enriched.parquet"
pl.DataFrame(
{
"Bedrooms": [2],
"Bathrooms": [1],
"Number of bedrooms & living rooms": [3],
"lon": [-0.1],
"lat": [51.5],
"Postcode": ["AA1 1AB"],
"Address per Property Register": ["1 High Street"],
"Leasehold/Freehold": [None],
"Property type": ["Terraced"],
"Property sub-type": ["Terraced"],
"Price qualifier": [""],
"Total floor area (sqm)": [None],
"Listing URL": ["https://example.test/listing"],
"Listing features": [["Garden"]],
"Listing date": [None],
"Listing status": ["For sale"],
"Asking price": [300_000],
"Asking price per sqm": [None],
},
schema={
"Bedrooms": pl.Int32,
"Bathrooms": pl.Int32,
"Number of bedrooms & living rooms": pl.Int32,
"lon": pl.Float64,
"lat": pl.Float64,
"Postcode": pl.Utf8,
"Address per Property Register": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Property type": pl.Utf8,
"Property sub-type": pl.Utf8,
"Price qualifier": pl.Utf8,
"Total floor area (sqm)": pl.Float64,
"Listing URL": pl.Utf8,
"Listing features": pl.List(pl.Utf8),
"Listing date": pl.Datetime("us"),
"Listing status": pl.Utf8,
"Asking price": pl.Int64,
"Asking price per sqm": pl.Int32,
},
).write_parquet(listings_path)
pl.DataFrame(
{
"Address per Property Register": ["1 HIGH STREET"],
"Postcode": ["AA1 1AA"],
"Leasehold/Freehold": ["Freehold"],
"Address per EPC": ["1 High Street"],
"Current energy rating": ["C"],
"Potential energy rating": ["B"],
"Total floor area (sqm)": [80.0],
"Number of bedrooms & living rooms": [4],
"Interior height (m)": [2.4],
"Construction year": [1935],
"Former council house": ["No"],
"Listed building": ["No"],
"Estimated monthly rent": [1200.0],
"Street tree density percentile": [75.0],
"Property type": ["Terraced"],
"Estimated current price": [310_000.0],
},
schema={
"Address per Property Register": pl.Utf8,
"Postcode": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Address per EPC": pl.Utf8,
"Current energy rating": pl.Utf8,
"Potential energy rating": pl.Utf8,
"Total floor area (sqm)": pl.Float64,
"Number of bedrooms & living rooms": pl.Int32,
"Interior height (m)": pl.Float64,
"Construction year": pl.UInt16,
"Former council house": pl.Utf8,
"Listed building": pl.Utf8,
"Estimated monthly rent": pl.Float32,
"Street tree density percentile": pl.Float32,
"Property type": pl.Utf8,
"Estimated current price": pl.Float64,
},
).write_parquet(properties_path)
pl.DataFrame(
{
"Postcode": ["AA1 1AA"],
"Income Score": [82.5],
"Within conservation area": ["Yes"],
}
).write_parquet(postcode_path)
pl.DataFrame(
{
"pcds": ["AA1 1AA", "AA1 1AB"],
"ctry25cd": ["E92000001", "E92000001"],
"doterm": [None, "202401"],
"east1m": [100.0, 105.0],
"north1m": [100.0, 105.0],
},
schema={
"pcds": pl.Utf8,
"ctry25cd": pl.Utf8,
"doterm": pl.Utf8,
"east1m": pl.Float64,
"north1m": pl.Float64,
},
).write_parquet(arcgis_path)
result = build_enriched_actual_listings(
listings_path=listings_path,
properties_path=properties_path,
postcode_features_path=postcode_path,
arcgis_path=arcgis_path,
output_path=output_path,
epc_path=None,
)
row = result.row(0, named=True)
assert output_path.exists()
assert row["Postcode"] == "AA1 1AA"
assert row["Historical property match status"] == "matched"
assert row["Income Score"] == 82.5
assert row["Within conservation area"] == "Yes"
assert row["Leasehold/Freehold"] == "Freehold"
assert row["Total floor area (sqm)"] == 80.0
assert row["Asking price per sqm"] == 3750
assert row["Estimated current price"] == 300_000
assert row["Current energy rating"] == "C"

View file

@ -2,16 +2,23 @@ import polars as pl
import pyarrow as pa
import pytest
from shapely import box, to_wkb
from shapely.geometry import Point
from pipeline.transform.merge import (
_AREA_COLUMNS,
CONSERVATION_AREA_FEATURE,
LISTED_BUILDING_FEATURE,
TREE_DENSITY_FEATURE,
_is_unpublished_conservation_area_record,
_LISTING_OVERLAY_SOURCES,
_build_unmatched_listing_seed_rows,
_canonical_postcode_expr,
_finalize_listings,
_integrate_listings,
_match_direct_epc,
_is_dynamic_poi_metric_column,
_less_deprived_percentile_expr,
_load_conservation_area_geometries,
_load_listings_for_merge,
_matched_listed_building_flags,
_postcode_conservation_area_flags,
_postcode_listed_building_candidates,
@ -85,31 +92,28 @@ def test_postcode_conservation_area_flags_marks_point_membership() -> None:
]
def test_unpublished_conservation_area_records_are_identified() -> None:
assert _is_unpublished_conservation_area_record(
"No data available for publication by HE"
)
assert not _is_unpublished_conservation_area_record("Bloomsbury")
assert not _is_unpublished_conservation_area_record(None)
def test_load_conservation_area_geometries_skips_unpublished_placeholders(
def test_load_conservation_area_geometries_uses_current_planning_data_records(
monkeypatch: pytest.MonkeyPatch,
tmp_path,
) -> None:
real_area = box(0, 0, 1, 1)
placeholder_area = box(-100, -100, 100, 100)
ended_area = box(2, 2, 3, 3)
other_dataset_area = box(4, 4, 5, 5)
point = Point(0.5, 0.5)
def fake_read_arrow(path, columns):
assert path == tmp_path / "conservation_areas.gpkg"
assert columns == ["NAME"]
def fake_read_arrow(path):
assert path == tmp_path / "conservation_areas.geojson"
table = pa.table(
{
"NAME": [
"Central Village",
"No data available for publication by HE",
"dataset": [
"conservation-area",
"conservation-area",
"listed-building",
"conservation-area",
],
"SHAPE": to_wkb([real_area, placeholder_area]),
"end-date": ["", "2025-01-01", "", ""],
"name": ["Central Village", "Old Boundary", "Other", "Point Record"],
"SHAPE": to_wkb([real_area, ended_area, other_dataset_area, point]),
}
)
return {"geometry_name": "SHAPE", "crs": "EPSG:4326"}, table
@ -117,7 +121,7 @@ def test_load_conservation_area_geometries_skips_unpublished_placeholders(
monkeypatch.setattr("pipeline.transform.merge.pyogrio.read_arrow", fake_read_arrow)
geometries, crs = _load_conservation_area_geometries(
tmp_path / "conservation_areas.gpkg"
tmp_path / "conservation_areas.geojson"
)
assert crs == "EPSG:4326"
@ -290,3 +294,440 @@ def test_tree_density_by_postcode_requires_postcode_and_density_columns(
with pytest.raises(ValueError, match="missing required column: postcode"):
_tree_density_by_postcode(missing_postcode_path)
def _sample_listings_frame() -> pl.DataFrame:
return pl.DataFrame(
{
"Bedrooms": [3],
"Bathrooms": [2],
"Number of bedrooms & living rooms": [4],
"lon": [-0.1],
"lat": [51.5],
"Postcode": ["sw1a1aa"],
"Address per Property Register": ["1 Example Road"],
"Leasehold/Freehold": ["Freehold"],
"Property type": ["Terraced"],
"Property sub-type": ["Mid-Terrace"],
"Price qualifier": [""],
"Total floor area (sqm)": [120.0],
"Listing URL": ["https://example.test/abc"],
"Listing features": [["Garden", "Off-street parking"]],
"Listing date": [None],
"Listing status": ["For sale"],
"Asking price": [750_000],
"Asking price per sqm": [6_250],
},
schema={
"Bedrooms": pl.Int32,
"Bathrooms": pl.Int32,
"Number of bedrooms & living rooms": pl.Int32,
"lon": pl.Float64,
"lat": pl.Float64,
"Postcode": pl.Utf8,
"Address per Property Register": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Property type": pl.Utf8,
"Property sub-type": pl.Utf8,
"Price qualifier": pl.Utf8,
"Total floor area (sqm)": pl.Float64,
"Listing URL": pl.Utf8,
"Listing features": pl.List(pl.Utf8),
"Listing date": pl.Datetime("us"),
"Listing status": pl.Utf8,
"Asking price": pl.Int64,
"Asking price per sqm": pl.Int32,
},
)
def _stub_arcgis(path) -> None:
pl.DataFrame(
{
"pcds": ["SW1A 1AA"],
"ctry25cd": ["E92000001"],
"doterm": [None],
"east1m": [530000.0],
"north1m": [180000.0],
},
schema={
"pcds": pl.Utf8,
"ctry25cd": pl.Utf8,
"doterm": pl.Utf8,
"east1m": pl.Float64,
"north1m": pl.Float64,
},
).write_parquet(path)
def test_canonical_postcode_expr_formats_compact_postcodes() -> None:
df = pl.DataFrame({"Postcode": ["sw1a1aa", "SW1A 1AA", "bad", None]})
result = df.with_columns(_canonical_postcode_expr("Postcode").alias("canonical"))
assert result["canonical"].to_list() == ["SW1A 1AA", "SW1A 1AA", None, None]
def test_load_listings_for_merge_canonicalises_and_exposes_overlay_columns(
tmp_path,
) -> None:
listings_path = tmp_path / "listings.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
_sample_listings_frame().write_parquet(listings_path)
_stub_arcgis(arcgis_path)
loaded = _load_listings_for_merge(listings_path, arcgis_path)
assert loaded["postcode"].to_list() == ["SW1A 1AA"]
assert loaded["pp_address"].to_list() == ["1 Example Road"]
assert loaded["_actual_listing_url"].to_list() == ["https://example.test/abc"]
assert loaded["_actual_asking_price"].to_list() == [750_000]
assert loaded["_actual_lat"].to_list() == [51.5]
def test_build_unmatched_listing_seed_rows_fills_property_shape_fields(
tmp_path,
) -> None:
listings_path = tmp_path / "listings.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
_sample_listings_frame().write_parquet(listings_path)
_stub_arcgis(arcgis_path)
listings = _load_listings_for_merge(listings_path, arcgis_path)
template_schema = pl.Schema(
{
"postcode": pl.Utf8,
"pp_address": pl.Utf8,
"pp_property_type": pl.Utf8,
"duration": pl.Utf8,
"total_floor_area": pl.Float64,
"number_habitable_rooms": pl.Int16,
"latest_price": pl.Int64,
"epc_address": pl.Utf8,
**{dst: dtype for _src, dst, dtype in _LISTING_OVERLAY_SOURCES},
}
)
unmatched_idxs = listings.select("_listing_idx")
seed = _build_unmatched_listing_seed_rows(
unmatched_idxs, listings, template_schema
)
assert seed.height == 1
assert seed["postcode"].to_list() == ["SW1A 1AA"]
assert seed["pp_address"].to_list() == ["1 Example Road"]
assert seed["pp_property_type"].to_list() == ["Terraced"]
assert seed["duration"].to_list() == ["Freehold"]
assert seed["total_floor_area"].to_list() == [120.0]
assert seed["number_habitable_rooms"].to_list() == [4]
assert seed["latest_price"].to_list() == [750_000]
# Columns not populated from the listing default to null.
assert seed["epc_address"].to_list() == [None]
# Overlay columns flow through 1:1.
assert seed["_actual_listing_url"].to_list() == ["https://example.test/abc"]
def test_build_unmatched_listing_seed_rows_uses_direct_epc_fallbacks(
tmp_path,
) -> None:
listings_path = tmp_path / "listings.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
_sample_listings_frame().with_columns(
pl.lit(None, dtype=pl.Float64).alias("Total floor area (sqm)"),
pl.lit(None, dtype=pl.Int32).alias("Number of bedrooms & living rooms"),
).write_parquet(listings_path)
_stub_arcgis(arcgis_path)
listings = _load_listings_for_merge(listings_path, arcgis_path).with_columns(
pl.lit("1 Example Road").alias("_direct_epc_address"),
pl.lit("C").alias("_direct_current_energy_rating"),
pl.lit("B").alias("_direct_potential_energy_rating"),
pl.lit(98.0).alias("_direct_total_floor_area"),
pl.lit(4, dtype=pl.Int16).alias("_direct_number_habitable_rooms"),
pl.lit(2.4).alias("_direct_floor_height"),
pl.lit(1930, dtype=pl.UInt16).alias("_direct_construction_age_band"),
pl.lit(1, dtype=pl.UInt8).alias("_direct_is_construction_date_approximate"),
pl.lit("No").alias("_direct_was_council_house"),
)
template_schema = pl.Schema(
{
"postcode": pl.Utf8,
"pp_address": pl.Utf8,
"total_floor_area": pl.Float64,
"number_habitable_rooms": pl.Int16,
"epc_address": pl.Utf8,
"current_energy_rating": pl.Utf8,
"was_council_house": pl.Utf8,
**{dst: dtype for _src, dst, dtype in _LISTING_OVERLAY_SOURCES},
}
)
seed = _build_unmatched_listing_seed_rows(
listings.select("_listing_idx"), listings, template_schema
)
assert seed["total_floor_area"].to_list() == [98.0]
assert seed["number_habitable_rooms"].to_list() == [4]
assert seed["epc_address"].to_list() == ["1 Example Road"]
assert seed["current_energy_rating"].to_list() == ["C"]
assert seed["was_council_house"].to_list() == ["No"]
def test_match_direct_epc_considers_nearby_postcodes() -> None:
listing_matches = pl.DataFrame(
{
"_listing_idx": [0],
"_listing_match_address": ["1 EXAMPLE ROAD"],
"_listing_match_postcode": ["AA11AA"],
"_listing_east": [1000.0],
"_listing_north": [1000.0],
"_actual_property_type": ["Terraced"],
"_actual_total_floor_area": [100.0],
"_actual_number_habitable_rooms": [4],
},
schema={
"_listing_idx": pl.UInt32,
"_listing_match_address": pl.Utf8,
"_listing_match_postcode": pl.Utf8,
"_listing_east": pl.Float64,
"_listing_north": pl.Float64,
"_actual_property_type": pl.Utf8,
"_actual_total_floor_area": pl.Float64,
"_actual_number_habitable_rooms": pl.Int16,
},
)
epc_candidates = pl.DataFrame(
{
"_direct_epc_row": [0],
"_direct_epc_match_address": ["1 EXAMPLE ROAD"],
"_direct_epc_match_postcode": ["BB11BB"],
"_direct_epc_east": [1020.0],
"_direct_epc_north": [1010.0],
"_direct_epc_canonical_property_type": ["Terraced"],
"_direct_epc_address": ["1, Example Road"],
"_direct_current_energy_rating": ["C"],
"_direct_potential_energy_rating": ["B"],
"_direct_total_floor_area": [101.0],
"_direct_number_habitable_rooms": [4],
"_direct_floor_height": [2.5],
"_direct_construction_age_band": [1930],
"_direct_is_construction_date_approximate": [1],
"_direct_was_council_house": ["No"],
},
schema={
"_direct_epc_row": pl.UInt32,
"_direct_epc_match_address": pl.Utf8,
"_direct_epc_match_postcode": pl.Utf8,
"_direct_epc_east": pl.Float64,
"_direct_epc_north": pl.Float64,
"_direct_epc_canonical_property_type": pl.Utf8,
"_direct_epc_address": pl.Utf8,
"_direct_current_energy_rating": pl.Utf8,
"_direct_potential_energy_rating": pl.Utf8,
"_direct_total_floor_area": pl.Float64,
"_direct_number_habitable_rooms": pl.Int16,
"_direct_floor_height": pl.Float64,
"_direct_construction_age_band": pl.UInt16,
"_direct_is_construction_date_approximate": pl.UInt8,
"_direct_was_council_house": pl.Utf8,
},
)
matches = _match_direct_epc(listing_matches, epc_candidates)
assert matches.height == 1
assert matches["_listing_idx"].to_list() == [0]
assert matches["_direct_epc_address"].to_list() == ["1, Example Road"]
def test_integrate_listings_attaches_overlay_by_matched_property_key(tmp_path) -> None:
listings_path = tmp_path / "listings.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
_sample_listings_frame().write_parquet(listings_path)
_stub_arcgis(arcgis_path)
wide = pl.DataFrame(
{
"postcode": ["SW1A 1AA", "SW1A 1AA"],
"pp_address": ["9 Other Road", "1 Example Road"],
"pp_property_type": ["Detached", "Terraced"],
"duration": ["Freehold", "Freehold"],
"total_floor_area": [80.0, 90.0],
"number_habitable_rooms": [3, 4],
"latest_price": [500_000, 600_000],
"epc_address": [None, "1 Example Road"],
"current_energy_rating": [None, "C"],
"potential_energy_rating": [None, "B"],
"floor_height": [None, 2.4],
"construction_age_band": [None, 1930],
"is_construction_date_approximate": [None, 1],
"was_council_house": [None, "No"],
},
schema={
"postcode": pl.Utf8,
"pp_address": pl.Utf8,
"pp_property_type": pl.Utf8,
"duration": pl.Utf8,
"total_floor_area": pl.Float64,
"number_habitable_rooms": pl.Int16,
"latest_price": pl.Int64,
"epc_address": pl.Utf8,
"current_energy_rating": pl.Utf8,
"potential_energy_rating": pl.Utf8,
"floor_height": pl.Float64,
"construction_age_band": pl.UInt16,
"is_construction_date_approximate": pl.UInt8,
"was_council_house": pl.Utf8,
},
)
integrated = _integrate_listings(
wide.lazy(), listings_path, arcgis_path, epc_path=None
).collect()
matched = integrated.filter(pl.col("pp_address") == "1 Example Road")
other = integrated.filter(pl.col("pp_address") == "9 Other Road")
assert matched["_actual_listing_url"].to_list() == ["https://example.test/abc"]
assert other["_actual_listing_url"].to_list() == [None]
def test_integrate_listings_rejects_low_confidence_no_number_match(tmp_path) -> None:
listings_path = tmp_path / "listings.parquet"
arcgis_path = tmp_path / "arcgis.parquet"
_sample_listings_frame().with_columns(
pl.lit("Rose Cottage High Street").alias("Address per Property Register"),
).write_parquet(listings_path)
_stub_arcgis(arcgis_path)
wide = pl.DataFrame(
{
"postcode": ["SW1A 1AA"],
"pp_address": ["Old Cottage High Street"],
"pp_property_type": ["Terraced"],
"duration": ["Freehold"],
"total_floor_area": [120.0],
"number_habitable_rooms": [4],
"latest_price": [750_000],
"epc_address": ["Old Cottage High Street"],
"current_energy_rating": ["C"],
"potential_energy_rating": ["B"],
"floor_height": [2.4],
"construction_age_band": [1930],
"is_construction_date_approximate": [1],
"was_council_house": ["No"],
},
schema={
"postcode": pl.Utf8,
"pp_address": pl.Utf8,
"pp_property_type": pl.Utf8,
"duration": pl.Utf8,
"total_floor_area": pl.Float64,
"number_habitable_rooms": pl.Int16,
"latest_price": pl.Int64,
"epc_address": pl.Utf8,
"current_energy_rating": pl.Utf8,
"potential_energy_rating": pl.Utf8,
"floor_height": pl.Float64,
"construction_age_band": pl.UInt16,
"is_construction_date_approximate": pl.UInt8,
"was_council_house": pl.Utf8,
},
)
integrated = _integrate_listings(
wide.lazy(), listings_path, arcgis_path, epc_path=None
).collect()
existing = integrated.filter(pl.col("pp_address") == "Old Cottage High Street")
seed = integrated.filter(pl.col("pp_address") == "Rose Cottage High Street")
assert existing["_actual_listing_url"].to_list() == [None]
assert seed["_actual_listing_url"].to_list() == ["https://example.test/abc"]
def test_finalize_listings_promotes_overlay_columns_and_filters_to_listing_rows() -> (
None
):
df = pl.DataFrame(
{
"Postcode": ["SW1A 1AA", "SW1A 1AA"],
"Address per Property Register": ["1 Example Road", "2 Example Road"],
"Address per EPC": ["1 Example Road", None],
"Date of last transaction": [1990.0, None],
"lat": [51.5, 51.5],
"lon": [-0.1, -0.1],
"Total floor area (sqm)": [100.0, 95.0],
"Number of bedrooms & living rooms": [3, None],
"Property type": ["Terraced", None],
"Leasehold/Freehold": ["Leasehold", None],
"Last known price": [500_000, None],
"Street tree density percentile": [42.0, 42.0],
# Overlay columns: row 0 is a matched listing, row 1 is unmatched, row none.
"_actual_listing_url": ["url0", "url1"],
"_actual_asking_price": [600_000, 700_000],
"_actual_asking_price_per_sqm": [5_000, None],
"_actual_listing_date": [None, None],
"_actual_listing_status": ["For sale", "For sale"],
"_actual_listing_features": [["Garden"], ["Parking"]],
"_actual_bedrooms": [3, 4],
"_actual_bathrooms": [1, 2],
"_actual_price_qualifier": ["", ""],
"_actual_property_sub_type": ["Mid-Terrace", "End-Terrace"],
"_actual_lat": [51.51, 51.52],
"_actual_lon": [-0.11, -0.12],
"_actual_total_floor_area": [110.0, None],
"_actual_number_habitable_rooms": [4, 3],
"_actual_property_type": ["Terraced", "Flats/Maisonettes"],
"_actual_leasehold_freehold": ["Freehold", "Leasehold"],
},
schema={
"Postcode": pl.Utf8,
"Address per Property Register": pl.Utf8,
"Address per EPC": pl.Utf8,
"Date of last transaction": pl.Float64,
"lat": pl.Float64,
"lon": pl.Float64,
"Total floor area (sqm)": pl.Float64,
"Number of bedrooms & living rooms": pl.Int16,
"Property type": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Last known price": pl.Int64,
"Street tree density percentile": pl.Float32,
"_actual_listing_url": pl.Utf8,
"_actual_asking_price": pl.Int64,
"_actual_asking_price_per_sqm": pl.Int32,
"_actual_listing_date": pl.Datetime("us"),
"_actual_listing_status": pl.Utf8,
"_actual_listing_features": pl.List(pl.Utf8),
"_actual_bedrooms": pl.Int32,
"_actual_bathrooms": pl.Int32,
"_actual_price_qualifier": pl.Utf8,
"_actual_property_sub_type": pl.Utf8,
"_actual_lat": pl.Float64,
"_actual_lon": pl.Float64,
"_actual_total_floor_area": pl.Float64,
"_actual_number_habitable_rooms": pl.Int16,
"_actual_property_type": pl.Utf8,
"_actual_leasehold_freehold": pl.Utf8,
},
)
finalized = _finalize_listings(df).sort("Address per Property Register")
assert finalized.height == 2
assert finalized["Listing URL"].to_list() == ["url0", "url1"]
assert finalized["Asking price"].to_list() == [600_000, 700_000]
assert finalized["Asking price per sqm"].to_list() == [5_000, 7_368]
assert finalized["Est. price per sqm"].to_list() == [5_000, 7_368]
assert finalized["Estimated current price"].to_list() == [600_000, 700_000]
assert finalized["Last known price"].to_list() == [500_000, 700_000]
# Listing's preferred floor area / rooms / property type / tenure.
assert finalized["Total floor area (sqm)"].to_list() == [110.0, 95.0]
assert finalized["Number of bedrooms & living rooms"].to_list() == [4, 3]
assert finalized["Property type"].to_list() == ["Terraced", "Flats/Maisonettes"]
assert finalized["Leasehold/Freehold"].to_list() == ["Freehold", "Leasehold"]
# Postcode-level feature carried through to both matched and unmatched rows.
assert finalized["Street tree density percentile"].to_list() == [42.0, 42.0]
# Match status reflects historical context availability.
assert finalized["Historical property match status"].to_list() == [
"matched",
"unmatched",
]
# Overlay scaffolding is dropped.
for src, dst, _dt in _LISTING_OVERLAY_SOURCES:
assert dst not in finalized.columns, src

View file

@ -386,8 +386,7 @@ fn build_school_meta(
let website = extract_optional_str_col(df, "school_website")?.unwrap_or_default();
let telephone = extract_optional_str_col(df, "school_telephone")?.unwrap_or_default();
let head_name = extract_optional_str_col(df, "school_head_name")?.unwrap_or_default();
let ofsted_rating =
extract_optional_str_col(df, "school_ofsted_rating")?.unwrap_or_default();
let ofsted_rating = extract_optional_str_col(df, "school_ofsted_rating")?.unwrap_or_default();
let fetch_str = |col: &Vec<Option<String>>, row: usize| -> Option<String> {
col.get(row).cloned().flatten()

View file

@ -184,7 +184,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
name: "Within conservation area",
order: Some(&["Yes", "No"]),
description: "Whether the postcode point falls inside a designated conservation area",
detail: "Historic England conservation area boundaries, matched to the postcode representative point. The national dataset is indicative rather than definitive, so boundary-sensitive decisions should be checked with the local planning authority.",
detail: "Planning Data conservation area boundaries, matched to the postcode representative point. The national dataset is a work in progress and may include duplicates or incomplete local coverage, so boundary-sensitive decisions should be checked with the local planning authority.",
source: "conservation-areas",
}),
Feature::Enum(EnumFeatureConfig {

View file

@ -167,6 +167,10 @@ struct Cli {
#[arg(long)]
tiles: PathBuf,
/// Optional PMTiles raster basemap for satellite imagery.
#[arg(long, env = "SATELLITE_TILES")]
satellite_tiles: Option<PathBuf>,
/// Optional PMTiles raster overlay for high-resolution strategic noise.
#[arg(long, env = "NOISE_OVERLAY_TILES")]
noise_overlay_tiles: Option<PathBuf>,
@ -475,6 +479,8 @@ async fn main() -> anyhow::Result<()> {
tiles_path,
"noise_lden_10m.pmtiles",
);
let satellite_tiles =
configured_or_default_overlay_path(&cli.satellite_tiles, tiles_path, "satellite.pmtiles");
let crime_hotspot_tiles = configured_or_default_overlay_path(
&cli.crime_hotspot_tiles,
tiles_path,
@ -488,6 +494,7 @@ async fn main() -> anyhow::Result<()> {
let noise_overlay_reader =
init_optional_tile_reader("Noise", noise_overlay_tiles.as_ref()).await?;
let satellite_reader = init_optional_tile_reader("Satellite", satellite_tiles.as_ref()).await?;
let crime_hotspot_reader =
init_optional_tile_reader("Crime hotspots", crime_hotspot_tiles.as_ref()).await?;
let tree_overlay_reader =
@ -692,6 +699,7 @@ async fn main() -> anyhow::Result<()> {
let reader_tile = tile_reader.clone();
let reader_style = tile_reader.clone();
let reader_satellite = satellite_reader.clone();
let reader_noise_overlay = noise_overlay_reader.clone();
let reader_crime_hotspot = crime_hotspot_reader.clone();
let reader_tree_overlay = tree_overlay_reader.clone();
@ -858,6 +866,18 @@ async fn main() -> anyhow::Result<()> {
})
.layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/tiles/satellite/{z}/{x}/{y}",
get(move |path| {
routes::get_overlay_tile(
reader_satellite.clone(),
routes::OverlayTileFormat::RasterJpeg,
"satellite",
path,
)
})
.layer(ConcurrencyLimitLayer::new(30)),
)
.route(
"/api/overlays/noise/{z}/{x}/{y}",
get(move |path| {

View file

@ -6,6 +6,7 @@ mod h3;
pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds, require_bounds};
pub use fields::{
parse_enum_dist, parse_field_indices, parse_field_indices_with_poi, parse_field_set,
ParsedFieldIndices,
};
pub use filters::{
count_filter_impacts, count_filter_rejections, parse_filters, parse_filters_with_poi,

View file

@ -40,7 +40,13 @@ pub struct ActualListingsResponse {
pub truncated: bool,
}
const KEEP_UNKNOWN_LISTING_FILTER_FEATURES: &[&str] = &["Total floor area (sqm)"];
const KEEP_UNKNOWN_LISTING_FILTER_FEATURES: &[&str] = &[
"Total floor area (sqm)",
"Leasehold/Freehold",
"Number of bedrooms & living rooms",
"Property type",
];
const LISTING_BOUNDS_EPSILON_DEGREES: f64 = 0.00001;
pub async fn get_actual_listings(
State(shared): State<Arc<SharedState>>,
@ -98,14 +104,29 @@ pub async fn get_actual_listings(
};
let row_indices = actual_listings.grid.query(south, west, north, east);
let total_in_bounds = row_indices.len();
// Build (row, sort_key) pairs so we can sort by index without
// materializing the full ActualListing for every matching row.
let mut matching_rows: Vec<usize> = row_indices
let total_grid_candidates = row_indices.len();
let candidate_rows: Vec<usize> = row_indices
.iter()
.filter_map(|&row_idx| {
let row = row_idx as usize;
row_is_within_bounds(
actual_listings.lat[row],
actual_listings.lon[row],
south,
west,
north,
east,
)
.then_some(row)
})
.collect();
let total_in_bounds = candidate_rows.len();
// Build (row, sort_key) pairs so we can sort by index without
// materializing the full ActualListing for every matching row.
let mut matching_rows: Vec<usize> = candidate_rows
.into_iter()
.filter(|&row| {
if has_listing_filters
&& !row_passes_listing_filters(
row,
@ -116,7 +137,7 @@ pub async fn get_actual_listings(
&keep_unknown_listing_filter_idxs,
)
{
return None;
return false;
}
if has_poi_filters
&& !row_passes_listing_poi_filters(
@ -126,7 +147,7 @@ pub async fn get_actual_listings(
poi_num_features,
)
{
return None;
return false;
}
if has_travel_filters
&& !row_passes_travel_filters(
@ -135,9 +156,9 @@ pub async fn get_actual_listings(
&travel_data,
)
{
return None;
return false;
}
Some(row)
true
})
.collect();
@ -162,6 +183,7 @@ pub async fn get_actual_listings(
results = listings.len(),
total = total_matching,
total_in_bounds,
total_grid_candidates,
offset,
listing_filtered = has_listing_filters,
poi_filtered = has_poi_filters,
@ -214,10 +236,23 @@ fn row_passes_listing_filters(
}
}) && enum_filters.iter().all(|filter| {
let raw = feature_data[base + filter.feat_idx];
raw != NAN_U16 && filter.allowed.contains(&raw)
if raw == NAN_U16 {
keep_unknown_filter_idxs.contains(&filter.feat_idx)
} else {
filter.allowed.contains(&raw)
}
})
}
fn row_is_within_bounds(lat: f32, lon: f32, south: f64, west: f64, north: f64, east: f64) -> bool {
let lat = lat as f64;
let lon = lon as f64;
lat >= south - LISTING_BOUNDS_EPSILON_DEGREES
&& lat <= north + LISTING_BOUNDS_EPSILON_DEGREES
&& lon >= west - LISTING_BOUNDS_EPSILON_DEGREES
&& lon <= east + LISTING_BOUNDS_EPSILON_DEGREES
}
fn row_passes_listing_poi_filters(
row: usize,
filters: &[ParsedPoiFilter],
@ -245,6 +280,20 @@ fn row_passes_listing_poi_filters(
mod tests {
use super::*;
#[test]
fn listing_bounds_check_keeps_only_exact_viewport_rows() {
assert!(row_is_within_bounds(51.5, -0.1, 51.4, -0.2, 51.6, 0.0));
// Bounds are inclusive so edge points are retained.
assert!(row_is_within_bounds(51.4, -0.2, 51.4, -0.2, 51.6, 0.0));
assert!(row_is_within_bounds(51.6, 0.0, 51.4, -0.2, 51.6, 0.0));
assert!(!row_is_within_bounds(51.399, -0.1, 51.4, -0.2, 51.6, 0.0));
assert!(!row_is_within_bounds(51.601, -0.1, 51.4, -0.2, 51.6, 0.0));
assert!(!row_is_within_bounds(51.5, -0.201, 51.4, -0.2, 51.6, 0.0));
assert!(!row_is_within_bounds(51.5, 0.001, 51.4, -0.2, 51.6, 0.0));
}
#[test]
fn listing_floor_area_filter_keeps_unknown_values() {
let floor_area_filter = ParsedFilter {
@ -290,6 +339,48 @@ mod tests {
));
}
#[test]
fn listing_enum_filter_keeps_allowlisted_unknown_values() {
let enum_filter = ParsedEnumFilter {
feat_idx: 0,
allowed: [1u16].into_iter().collect(),
};
let keep_unknown_filter_idxs: FxHashSet<usize> = [0usize].into_iter().collect();
assert!(row_passes_listing_filters(
0,
&[],
&[enum_filter],
&[NAN_U16],
1,
&keep_unknown_filter_idxs
));
assert!(!row_passes_listing_filters(
0,
&[],
&[ParsedEnumFilter {
feat_idx: 0,
allowed: [1u16].into_iter().collect(),
}],
&[2],
1,
&keep_unknown_filter_idxs
));
assert!(row_passes_listing_filters(
0,
&[],
&[ParsedEnumFilter {
feat_idx: 0,
allowed: [1u16].into_iter().collect(),
}],
&[1],
1,
&keep_unknown_filter_idxs
));
}
#[test]
fn listing_poi_filter_uses_listing_metric_matrix() {
let filter = ParsedPoiFilter {

View file

@ -354,7 +354,11 @@ pub async fn get_export(
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?
};
let has_poi_filters = !parsed_poi_filters.is_empty();
let filters_str = if is_postcode_mode { None } else { params.filters };
let filters_str = if is_postcode_mode {
None
} else {
params.filters
};
let travel_entries = if is_postcode_mode {
Vec::new()
} else {
@ -472,9 +476,10 @@ pub async fn get_export(
let mut out: Vec<(usize, PostcodeExportAgg)> = Vec::with_capacity(entries.len());
for (pc_idx, _normalized) in entries {
let mut agg = PostcodeExportAgg::new(total_export_features);
for &row_idx in state.data.rows_for_postcode(
&postcode_data.postcodes[*pc_idx],
) {
for &row_idx in state
.data
.rows_for_postcode(&postcode_data.postcodes[*pc_idx])
{
agg.add_row(
feature_data,
row_idx as usize,
@ -518,7 +523,8 @@ pub async fn get_export(
return;
}
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
by_pc.entry(pc_idx)
by_pc
.entry(pc_idx)
.or_insert_with(|| PostcodeExportAgg::new(total_export_features))
.add_row(
feature_data,
@ -531,10 +537,8 @@ pub async fn get_export(
}
});
let mut aggs: Vec<(usize, PostcodeExportAgg)> = by_pc
.into_iter()
.filter(|(_, agg)| agg.count > 0)
.collect();
let mut aggs: Vec<(usize, PostcodeExportAgg)> =
by_pc.into_iter().filter(|(_, agg)| agg.count > 0).collect();
// Sort by property count descending
aggs.sort_unstable_by_key(|agg| std::cmp::Reverse(agg.1.count));

View file

@ -12,6 +12,7 @@ use super::TileReader;
pub enum OverlayTileFormat {
VectorMvtGzip,
RasterPng,
RasterJpeg,
}
impl OverlayTileFormat {
@ -19,6 +20,7 @@ impl OverlayTileFormat {
match self {
Self::VectorMvtGzip => "application/x-protobuf",
Self::RasterPng => "image/png",
Self::RasterJpeg => "image/jpeg",
}
}

View file

@ -10,7 +10,10 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::{POSTCODE_SEARCH_OFFSET, PROPERTIES_LIMIT};
use crate::licensing::{check_license_point, resolve_share_code};
use crate::parsing::{parse_filters_with_poi, row_passes_filters, row_passes_poi_filters};
use crate::parsing::{
parse_field_indices_with_poi, parse_filters_with_poi, row_passes_filters,
row_passes_poi_filters,
};
use crate::state::SharedState;
use crate::utils::normalize_postcode;
@ -25,6 +28,10 @@ pub struct PostcodePropertiesParams {
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
pub offset: Option<usize>,
/// `;;`-separated numeric feature names to include in each property payload.
/// If absent, keeps the legacy behavior and returns all numeric features.
/// If empty, returns only the fixed property card fields.
pub fields: Option<String>,
/// Exact address to rank first when opening properties from address search.
pub focus_address: Option<String>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
@ -76,6 +83,17 @@ pub async fn get_postcode_properties(
let has_poi_filters = !parsed_poi_filters.is_empty();
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let field_indices = parse_field_indices_with_poi(
params.fields.as_deref(),
&state.feature_name_to_index,
&state.data.poi_metrics.name_to_index,
)
.map_err(|err| (err.0, err.1).into_response())?;
let fields_count = field_indices
.normal
.as_ref()
.map(|indices| (indices.len() + field_indices.poi.len()) as i32)
.unwrap_or(-1);
let postcode_str = normalized;
let focus_address = params
@ -165,6 +183,7 @@ pub async fn get_postcode_properties(
feature_names,
feature_name_to_index,
enum_values,
&field_indices,
)
})
.collect();
@ -177,6 +196,7 @@ pub async fn get_postcode_properties(
offset = page_offset,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
fields = fields_count,
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/postcode-properties"

View file

@ -14,8 +14,9 @@ use crate::consts::PROPERTIES_LIMIT;
use crate::data::{HistoricalPrice, RenovationEvent};
use crate::licensing::{check_license_bounds, resolve_share_code};
use crate::parsing::{
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_filters_with_poi, row_passes_filters,
row_passes_poi_filters, validate_h3_resolution,
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_indices_with_poi,
parse_filters_with_poi, row_passes_filters, row_passes_poi_filters, validate_h3_resolution,
ParsedFieldIndices,
};
use crate::state::{AppState, SharedState};
@ -30,6 +31,10 @@ pub struct HexagonPropertiesParams {
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
pub offset: Option<usize>,
/// `;;`-separated numeric feature names to include in each property payload.
/// If absent, keeps the legacy behavior and returns all numeric features.
/// If empty, returns only the fixed property card fields.
pub fields: Option<String>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
pub share: Option<String>,
}
@ -106,27 +111,81 @@ fn lookup_enum_value(
}
}
fn insert_feature_value(
features: &mut FxHashMap<String, f32>,
row: usize,
state: &AppState,
feature_names: &[String],
enum_values: &FxHashMap<usize, Vec<String>>,
feat_idx: usize,
) {
if feat_idx >= feature_names.len() || enum_values.contains_key(&feat_idx) {
return;
}
let value = state.data.get_feature(row, feat_idx);
if value.is_finite() {
features.insert(feature_names[feat_idx].clone(), value);
}
}
fn insert_poi_metric_value(
features: &mut FxHashMap<String, f32>,
row: usize,
state: &AppState,
metric_idx: usize,
) {
let Some(metric_name) = state.data.poi_metrics.feature_names.get(metric_idx) else {
return;
};
let value = state.data.poi_metrics.get_for_property_row(row, metric_idx);
if value.is_finite() {
features.insert(metric_name.clone(), value);
}
}
pub fn build_property(
row: usize,
state: &AppState,
feature_names: &[String],
feature_name_to_index: &FxHashMap<String, usize>,
enum_values: &FxHashMap<usize, Vec<String>>,
field_indices: &ParsedFieldIndices,
) -> Property {
let mut features = FxHashMap::default();
for (feat_idx, feat_name) in feature_names.iter().enumerate() {
if enum_values.contains_key(&feat_idx) {
continue;
if let Some(indices) = field_indices.normal.as_deref() {
for &feat_idx in indices {
insert_feature_value(
&mut features,
row,
state,
feature_names,
enum_values,
feat_idx,
);
}
let value = state.data.get_feature(row, feat_idx);
if value.is_finite() {
features.insert(feat_name.clone(), value);
} else {
for feat_idx in 0..feature_names.len() {
insert_feature_value(
&mut features,
row,
state,
feature_names,
enum_values,
feat_idx,
);
}
}
for (metric_idx, metric_name) in state.data.poi_metrics.feature_names.iter().enumerate() {
let value = state.data.poi_metrics.get_for_property_row(row, metric_idx);
if value.is_finite() {
features.insert(metric_name.clone(), value);
if field_indices.normal.is_some() {
for &metric_idx in &field_indices.poi {
insert_poi_metric_value(&mut features, row, state, metric_idx);
}
} else {
for metric_idx in 0..state.data.poi_metrics.feature_names.len() {
insert_poi_metric_value(&mut features, row, state, metric_idx);
}
}
@ -241,6 +300,17 @@ pub async fn get_hexagon_properties(
let has_poi_filters = !parsed_poi_filters.is_empty();
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let field_indices = parse_field_indices_with_poi(
params.fields.as_deref(),
&state.feature_name_to_index,
&state.data.poi_metrics.name_to_index,
)
.map_err(|err| (err.0, err.1).into_response())?;
let fields_count = field_indices
.normal
.as_ref()
.map(|indices| (indices.len() + field_indices.poi.len()) as i32)
.unwrap_or(-1);
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
@ -309,6 +379,7 @@ pub async fn get_hexagon_properties(
feature_names,
feature_name_to_index,
enum_values,
&field_indices,
)
})
.collect();
@ -322,6 +393,7 @@ pub async fn get_hexagon_properties(
offset,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
fields = fields_count,
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-properties"

View file

@ -135,6 +135,7 @@ fn is_allowed_param_key(key: &str) -> bool {
| "amenityCount5km"
| "poi"
| "overlay"
| "basemap"
| "tab"
| "pc"
| "tt"
@ -585,6 +586,14 @@ mod tests {
);
}
#[test]
fn preserves_basemap_for_share_links() {
let params =
sanitized_query_params("lat=51.5&lon=-0.1&zoom=12&basemap=satellite", false).unwrap();
assert_eq!(params, "lat=51.5&lon=-0.1&zoom=12&basemap=satellite");
}
#[test]
fn escapes_html_attributes() {
assert_eq!(escape_attr(r#""'><&"#), "&quot;&#39;&gt;&lt;&amp;");