Update data

This commit is contained in:
Andras Schmelczer 2026-05-14 08:17:10 +01:00
parent a4103b0896
commit 273d7a83ee
15 changed files with 716 additions and 316 deletions

4
.gitignore vendored
View file

@ -17,3 +17,7 @@ frontend/public/assets/*
frontend/public/assets/.done frontend/public/assets/.done
server-rs/logs server-rs/logs
video/auth.* video/auth.*
*.jpg
*.jpeg
*.mp4

View file

@ -94,6 +94,7 @@ export function useSavedProperties(userId: string | null) {
} catch (err) { } catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to save property'; const msg = err instanceof Error ? err.message : 'Failed to save property';
setError(msg); setError(msg);
throw err;
} }
}, },
[userId, fetchProperties] [userId, fetchProperties]

View file

@ -23,6 +23,11 @@ html.dark {
color-scheme: dark; color-scheme: dark;
} }
button:not(:disabled),
[role='button']:not([aria-disabled='true']) {
cursor: pointer;
}
/* Smooth theme transitions (scoped to avoid map performance issues) */ /* Smooth theme transitions (scoped to avoid map performance issues) */
body, body,
div, div,

View file

@ -10,7 +10,11 @@ import pyarrow as pa
import pyarrow.csv as pa_csv import pyarrow.csv as pa_csv
import pyarrow.parquet as pq import pyarrow.parquet as pq
from ..utils import fuzzy_join_on_postcode from ..utils import (
fuzzy_join_on_postcode,
normalize_address_key,
normalize_postcode_key,
)
pl.Config.set_tbl_cols(-1) pl.Config.set_tbl_cols(-1)
@ -193,12 +197,15 @@ def main():
def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Path): def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Path):
epc_base = _scan_epc_certificates(epc_path, temp_dir) epc_base = _scan_epc_certificates(epc_path, temp_dir).with_columns(
normalize_address_key(pl.col("epc_address")).alias("_epc_match_address"),
normalize_postcode_key(pl.col("epc_postcode")).alias("_epc_match_postcode"),
)
# Dedup fork: keep latest certificate per property (existing logic) # Dedup fork: keep latest certificate per property (existing logic)
epc = ( epc = (
epc_base.sort("inspection_date", descending=True) epc_base.sort("inspection_date", descending=True)
.group_by("epc_address", "epc_postcode") .group_by("_epc_match_address", "_epc_match_postcode")
.first() .first()
.drop("tenure") .drop("tenure")
) )
@ -216,15 +223,15 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.with_columns( .with_columns(
pl.col("number_habitable_rooms") pl.col("number_habitable_rooms")
.shift(1) .shift(1)
.over("epc_address", "epc_postcode") .over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_rooms"), .alias("_prev_rooms"),
pl.col("total_floor_area") pl.col("total_floor_area")
.shift(1) .shift(1)
.over("epc_address", "epc_postcode") .over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_area"), .alias("_prev_area"),
pl.col("_rating_rank") pl.col("_rating_rank")
.shift(1) .shift(1)
.over("epc_address", "epc_postcode") .over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_rating_rank"), .alias("_prev_rating_rank"),
) )
.with_columns( .with_columns(
@ -257,7 +264,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.cast(pl.Int32) .cast(pl.Int32)
.alias("_event_year"), .alias("_event_year"),
) )
.group_by("epc_address", "epc_postcode") .group_by("_epc_match_address", "_epc_match_postcode")
.agg( .agg(
pl.struct( pl.struct(
pl.col("_event_year").alias("year"), pl.col("_event_year").alias("year"),
@ -276,7 +283,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
# Social tenure fork: flag properties that were ever social housing # Social tenure fork: flag properties that were ever social housing
social_tenure = ( social_tenure = (
epc_base.filter(pl.col("tenure").str.to_lowercase().str.contains("social")) epc_base.filter(pl.col("tenure").str.to_lowercase().str.contains("social"))
.select("epc_address", "epc_postcode") .select("_epc_match_address", "_epc_match_postcode")
.unique() .unique()
.with_columns(pl.lit("Yes").alias("was_council_house")) .with_columns(pl.lit("Yes").alias("was_council_house"))
.collect() .collect()
@ -287,12 +294,12 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
epc = ( epc = (
epc.join( epc.join(
events.lazy(), events.lazy(),
on=["epc_address", "epc_postcode"], on=["_epc_match_address", "_epc_match_postcode"],
how="left", how="left",
) )
.join( .join(
social_tenure.lazy(), social_tenure.lazy(),
on=["epc_address", "epc_postcode"], on=["_epc_match_address", "_epc_match_postcode"],
how="left", how="left",
) )
.with_columns( .with_columns(
@ -339,9 +346,23 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
ignore_nulls=True, ignore_nulls=True,
).alias("pp_address"), ).alias("pp_address"),
) )
.with_columns(
normalize_address_key(pl.col("pp_address")).alias("_pp_match_address"),
normalize_postcode_key(pl.col("postcode")).alias("_pp_match_postcode"),
)
.filter(pl.col("_pp_match_postcode").is_not_null())
.with_columns(
pl.coalesce("_pp_match_address", "pp_address").alias("_pp_group_address"),
pl.col("_pp_match_postcode").alias("_pp_group_postcode"),
)
.filter(pl.col("pp_address").is_not_null())
.sort("date_of_transfer") .sort("date_of_transfer")
.group_by("pp_address", "postcode", maintain_order=True) .group_by("_pp_group_address", "_pp_group_postcode", maintain_order=True)
.agg( .agg(
pl.col("pp_address").last(),
pl.col("postcode").last(),
pl.col("_pp_match_address").last(),
pl.col("_pp_match_postcode").last(),
pl.struct( pl.struct(
pl.col("date_of_transfer").dt.year().alias("year"), pl.col("date_of_transfer").dt.year().alias("year"),
pl.col("date_of_transfer").dt.month().cast(pl.UInt8).alias("month"), pl.col("date_of_transfer").dt.month().cast(pl.UInt8).alias("month"),
@ -354,7 +375,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
pl.col("date_of_transfer").first().alias("first_transfer_date"), pl.col("date_of_transfer").first().alias("first_transfer_date"),
pl.col("old_new").first(), pl.col("old_new").first(),
) )
).filter(pl.col("pp_address").is_not_null()) )
print("Price paid dataset") print("Price paid dataset")
print(price_paid.head().collect()) print(price_paid.head().collect())
@ -405,7 +426,19 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.then(pl.lit(1, dtype=pl.UInt8)) .then(pl.lit(1, dtype=pl.UInt8))
.otherwise(pl.lit(None, dtype=pl.UInt8)) .otherwise(pl.lit(None, dtype=pl.UInt8))
.alias("is_construction_date_approximate"), .alias("is_construction_date_approximate"),
).drop("old_new", "first_transfer_date") ).drop(
[
"old_new",
"first_transfer_date",
"_pp_match_address",
"_pp_match_postcode",
"_pp_group_address",
"_pp_group_postcode",
"_epc_match_address",
"_epc_match_postcode",
],
strict=False,
)
joined = joined.rename({col: col.lower() for col in joined.columns}) joined = joined.rename({col: col.lower() for col in joined.columns})

View file

@ -22,6 +22,8 @@ _AREA_COLUMNS = [
"Postcode", "Postcode",
"lat", "lat",
"lon", "lon",
# Runtime provenance for deciding whether missing coordinates are skippable.
"ctry25cd",
# Deprivation # Deprivation
"Income Score", "Income Score",
"Employment Score", "Employment Score",
@ -86,6 +88,15 @@ _AREA_COLUMNS = [
_DYNAMIC_POI_DISTANCE_RE = re.compile(r"^Distance to nearest amenity \(.+\) \(km\)$") _DYNAMIC_POI_DISTANCE_RE = re.compile(r"^Distance to nearest amenity \(.+\) \(km\)$")
_DYNAMIC_POI_COUNT_RE = re.compile(r"^Number of amenities \(.+\) within (2|5)km$") _DYNAMIC_POI_COUNT_RE = re.compile(r"^Number of amenities \(.+\) within (2|5)km$")
TREE_DENSITY_FEATURE = "Street tree density percentile" TREE_DENSITY_FEATURE = "Street tree density percentile"
_POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
r"^Tree canopy density percentile within \d+m$"
)
_RENT_SOURCE_UNAVAILABLE_LADS = {
# ONS PIPR does not publish LAD-level private-rent estimates for these
# small authorities. Keep rent null there, but fail on any other LAD miss.
"E06000053": "Isles of Scilly",
"E09000001": "City of London",
}
def _is_dynamic_poi_metric_column(column: str) -> bool: def _is_dynamic_poi_metric_column(column: str) -> bool:
@ -112,6 +123,107 @@ def _less_deprived_percentile_expr(column: str) -> pl.Expr:
) )
def _tree_density_by_postcode(tree_density_postcodes_path: Path) -> pl.LazyFrame:
tree_density = pl.scan_parquet(tree_density_postcodes_path)
columns = set(tree_density.collect_schema().names())
if "postcode" not in columns:
raise ValueError(
f"{tree_density_postcodes_path} is missing required column: postcode"
)
if TREE_DENSITY_FEATURE in columns:
density_column = TREE_DENSITY_FEATURE
else:
candidates = sorted(
c for c in columns if _POSTCODE_TREE_DENSITY_PERCENTILE_RE.match(c)
)
if len(candidates) != 1:
raise ValueError(
f'{tree_density_postcodes_path} must contain column "{TREE_DENSITY_FEATURE}" '
'or exactly one "Tree canopy density percentile within {radius}m" column; '
f"found {len(candidates)} postcode percentile columns"
)
density_column = candidates[0]
return (
tree_density.select(
pl.col("postcode"),
pl.col(density_column).cast(pl.Float32).alias(TREE_DENSITY_FEATURE),
)
.drop_nulls(["postcode"])
.unique(["postcode"])
)
def _validate_lad_source_coverage(
iod_path: Path, ethnicity_path: Path, rental_prices_path: Path
) -> None:
iod_lads = (
pl.read_parquet(
iod_path,
columns=[
"Local Authority District code (2024)",
"Local Authority District name (2024)",
],
)
.rename(
{
"Local Authority District code (2024)": "lad",
"Local Authority District name (2024)": "lad_name",
}
)
.unique(["lad"])
)
ethnicity_lads = pl.read_parquet(ethnicity_path, columns=["Geography_code"]).rename(
{"Geography_code": "lad"}
)
missing_ethnicity = iod_lads.join(ethnicity_lads, on="lad", how="anti").sort("lad")
if missing_ethnicity.height > 0:
raise ValueError(
"Ethnicity data is missing 2024 LAD coverage: "
f"{missing_ethnicity.to_dicts()}"
)
rental_lads = pl.read_parquet(rental_prices_path, columns=["area_code"]).rename(
{"area_code": "lad"}
)
missing_rent = iod_lads.join(rental_lads, on="lad", how="anti").sort("lad")
unexpected_missing_rent = missing_rent.filter(
~pl.col("lad").is_in(list(_RENT_SOURCE_UNAVAILABLE_LADS))
)
if unexpected_missing_rent.height > 0:
raise ValueError(
"Rental data is missing 2024 LAD coverage: "
f"{unexpected_missing_rent.to_dicts()}"
)
if missing_rent.height > 0:
print(
"PIPR has no LAD-level rent estimates for source-unavailable LADs; "
f"rent will remain null there: {missing_rent.to_dicts()}"
)
def _validate_property_postcodes(df: pl.DataFrame) -> None:
invalid = df.filter(
pl.col("Postcode").is_null()
| (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
)
if invalid.height == 0:
return
sample_cols = [
col
for col in ("Postcode", "Address per Property Register", "Last known price")
if col in invalid.columns
]
sample = invalid.select(sample_cols).head(10).to_dicts()
raise ValueError(
"Property rows missing a postcode after merge: "
f"{invalid.height} rows. Sample: {sample}"
)
def _build( def _build(
epc_pp_path: Path, epc_pp_path: Path,
arcgis_path: Path, arcgis_path: Path,
@ -126,12 +238,14 @@ def _build(
lsoa_population_path: Path, lsoa_population_path: Path,
median_age_path: Path, median_age_path: Path,
election_results_path: Path, election_results_path: Path,
tree_density_addresses_path: Path | None = None, tree_density_postcodes_path: Path | None = None,
) -> tuple[pl.DataFrame, pl.DataFrame]: ) -> tuple[pl.DataFrame, pl.DataFrame]:
"""Build postcode and properties dataframes from epc_pp + auxiliary data. """Build postcode and properties dataframes from epc_pp + auxiliary data.
Returns (postcode_df, properties_df). Returns (postcode_df, properties_df).
""" """
_validate_lad_source_coverage(iod_path, ethnicity_path, rental_prices_path)
wide = pl.scan_parquet(epc_pp_path).filter( wide = pl.scan_parquet(epc_pp_path).filter(
pl.col("total_floor_area").is_null() pl.col("total_floor_area").is_null()
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2) | (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
@ -152,9 +266,15 @@ def _build(
.drop("new_postcode") .drop("new_postcode")
) )
arcgis_raw = pl.scan_parquet(arcgis_path)
postcode_country = arcgis_raw.select(
pl.col("pcds").alias("postcode"),
pl.col("ctry25cd"),
).unique(["postcode"])
wide = wide.join(postcode_country, on="postcode", how="left")
arcgis = ( arcgis = (
pl.scan_parquet(arcgis_path) arcgis_raw.filter(pl.col("ctry25cd") == "E92000001") # England only
.filter(pl.col("ctry25cd") == "E92000001") # England only
.filter(pl.col("doterm").is_null()) # Active postcodes only .filter(pl.col("doterm").is_null()) # Active postcodes only
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd. # NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
# Alias them back to the short canonical names used across the # Alias them back to the short canonical names used across the
@ -191,7 +311,9 @@ def _build(
.cast(pl.UInt8) .cast(pl.UInt8)
.alias("_bedrooms"), .alias("_bedrooms"),
) )
rental = pl.scan_parquet(rental_prices_path) rental = pl.scan_parquet(rental_prices_path).select(
"area_code", "bedrooms", "mean_monthly_rent"
)
wide = wide.join( wide = wide.join(
rental, rental,
left_on=["Local Authority District code (2024)", "_bedrooms"], left_on=["Local Authority District code (2024)", "_bedrooms"],
@ -260,17 +382,9 @@ def _build(
school_proximity = pl.scan_parquet(school_proximity_path) school_proximity = pl.scan_parquet(school_proximity_path)
wide = wide.join(school_proximity, on="postcode", how="left") wide = wide.join(school_proximity, on="postcode", how="left")
if tree_density_addresses_path is not None: if tree_density_postcodes_path is not None:
tree_density = ( tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
pl.scan_parquet(tree_density_addresses_path) wide = wide.join(tree_density, on="postcode", how="left")
.select(
pl.col("postcode"),
pl.col("pp_address"),
pl.col(TREE_DENSITY_FEATURE).cast(pl.Float32),
)
.unique(["postcode", "pp_address"])
)
wide = wide.join(tree_density, on=["postcode", "pp_address"], how="left")
# Broadband: derive max available download speed tier per postcode from # Broadband: derive max available download speed tier per postcode from
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300, # Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
@ -415,6 +529,7 @@ def _build(
print("Collecting with streaming engine...") print("Collecting with streaming engine...")
df = wide.collect(engine="streaming") df = wide.collect(engine="streaming")
_validate_property_postcodes(df)
# Split into postcode-level and property-level dataframes # Split into postcode-level and property-level dataframes
area_cols = [ area_cols = [
@ -508,10 +623,10 @@ def main():
help="2024 General Election results by constituency parquet file", help="2024 General Election results by constituency parquet file",
) )
parser.add_argument( parser.add_argument(
"--tree-density-addresses", "--tree-density-postcodes",
type=Path, type=Path,
required=False, required=False,
help="Address-level tree density parquet from pipeline.transform.tree_density", help="Postcode-level tree density parquet from pipeline.transform.tree_density",
) )
parser.add_argument( parser.add_argument(
"--output-postcodes", "--output-postcodes",
@ -541,7 +656,7 @@ def main():
lsoa_population_path=args.lsoa_population, lsoa_population_path=args.lsoa_population,
median_age_path=args.median_age, median_age_path=args.median_age,
election_results_path=args.election_results, election_results_path=args.election_results,
tree_density_addresses_path=args.tree_density_addresses, tree_density_postcodes_path=args.tree_density_postcodes,
) )
print(f"\nPostcode columns: {postcode_df.columns}") print(f"\nPostcode columns: {postcode_df.columns}")

View file

@ -227,7 +227,18 @@ def main():
fa = test["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy() fa = test["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
print("\nComputing kNN estimates...") print("\nComputing kNN estimates...")
knn_psm = knn_median_psm(trees, lat, lon, tg) last_sale_dates = (
test["input_date"].dt.epoch("d").fill_null(-1).to_numpy().astype(np.int64)
)
knn_psm = knn_median_psm(
trees,
lat,
lon,
tg,
postcodes=test["Postcode"].fill_null("").to_numpy(),
last_prices=test["input_price"].cast(pl.Float64).to_numpy(),
last_sale_dates=last_sale_dates,
)
# Temporal adjustment: pool PSM is at ref, adjust to actual # Temporal adjustment: pool PSM is at ref, adjust to actual
log_idx_actual = test["log_index_actual"].to_numpy().astype(np.float64) log_idx_actual = test["log_index_actual"].to_numpy().astype(np.float64)

View file

@ -13,6 +13,7 @@ for lat/lon needed by kNN, then drops those columns before writing.
import argparse import argparse
from pathlib import Path from pathlib import Path
import numpy as np
import polars as pl import polars as pl
from pipeline.transform.price_estimation.knn import ( from pipeline.transform.price_estimation.knn import (
@ -28,6 +29,45 @@ from pipeline.transform.price_estimation.utils import (
type_group_expr, type_group_expr,
) )
MAX_KNN_TO_INDEX_RATIO = 2.0
MIN_KNN_TO_INDEX_RATIO = 0.5
MAX_ESTIMATE_TO_LAST_PRICE_RATIO = 6.0
def guarded_blend_estimates(
index_est: np.ndarray,
knn_est: np.ndarray,
last_prices: np.ndarray,
weight: float = KNN_BLEND_WEIGHT,
) -> np.ndarray:
"""Blend only stable kNN estimates and cap final uplift from last sale price."""
index_est = index_est.astype(np.float64, copy=False)
knn_est = knn_est.astype(np.float64, copy=False)
last_prices = last_prices.astype(np.float64, copy=False)
has_index = np.isfinite(index_est) & (index_est > 0)
has_knn = np.isfinite(knn_est) & (knn_est > 0)
stable_knn = has_knn & (
has_index
& (knn_est >= index_est * MIN_KNN_TO_INDEX_RATIO)
& (knn_est <= index_est * MAX_KNN_TO_INDEX_RATIO)
)
blended = np.where(
has_index & stable_knn,
(1 - weight) * index_est + weight * knn_est,
np.where(has_index, index_est, np.nan),
)
cap = np.where(
np.isfinite(last_prices) & (last_prices > 0),
last_prices * MAX_ESTIMATE_TO_LAST_PRICE_RATIO,
np.nan,
)
return np.where(
np.isfinite(cap) & np.isfinite(blended), np.minimum(blended, cap), blended
)
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -130,36 +170,54 @@ def main():
lon = df["lon"].cast(pl.Float64).to_numpy() lon = df["lon"].cast(pl.Float64).to_numpy()
tg = df["_type_group"].fill_null("").to_numpy() tg = df["_type_group"].fill_null("").to_numpy()
fa = df["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy() fa = df["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
last_prices = (
df["Last known price"].cast(pl.Float64).fill_null(float("nan")).to_numpy()
)
last_sale_dates = (
df["Date of last transaction"]
.dt.epoch("d")
.fill_null(-1)
.to_numpy()
.astype(np.int64)
)
knn_psm = knn_median_psm(trees, lat, lon, tg) knn_psm = knn_median_psm(
trees,
lat,
lon,
tg,
postcodes=df["Postcode"].fill_null("").to_numpy(),
last_prices=last_prices,
last_sale_dates=last_sale_dates,
)
knn_est = knn_psm * fa # No temporal adj: ref == current knn_est = knn_psm * fa # No temporal adj: ref == current
df = df.with_columns( df = df.with_columns(
pl.Series("_knn_est", knn_est, dtype=pl.Float64), pl.Series("_knn_est", knn_est, dtype=pl.Float64),
) )
# Blend: where kNN available, use weighted average; else keep index # Blend only when kNN is close to the index estimate; otherwise keep index.
index_est = (
df["Estimated current price"]
.cast(pl.Float64)
.fill_null(float("nan"))
.to_numpy()
)
blended = guarded_blend_estimates(index_est, knn_est, last_prices)
df = df.with_columns( df = df.with_columns(
pl.when( pl.Series("_index_est", index_est, dtype=pl.Float64),
pl.col("Estimated current price").is_not_null() pl.Series("Estimated current price", blended, dtype=pl.Float64),
& pl.col("_knn_est").is_not_null() ).with_columns(
& pl.col("_knn_est").is_finite() pl.col("Estimated current price").fill_nan(None),
& (pl.col("_knn_est") > 0)
)
.then(
(1 - KNN_BLEND_WEIGHT) * pl.col("Estimated current price")
+ KNN_BLEND_WEIGHT * pl.col("_knn_est")
)
.when(pl.col("Estimated current price").is_not_null())
.then(pl.col("Estimated current price"))
.otherwise(pl.lit(None))
.alias("Estimated current price"),
) )
n_blended = df.filter( n_blended = df.filter(
pl.col("_knn_est").is_not_null() pl.col("_knn_est").is_not_null()
& pl.col("_knn_est").is_finite() & pl.col("_knn_est").is_finite()
& (pl.col("_knn_est") > 0) & (pl.col("_knn_est") > 0)
& (pl.col("_index_est").is_not_null())
& (pl.col("_knn_est") >= pl.col("_index_est") * MIN_KNN_TO_INDEX_RATIO)
& (pl.col("_knn_est") <= pl.col("_index_est") * MAX_KNN_TO_INDEX_RATIO)
& pl.col("Estimated current price").is_not_null() & pl.col("Estimated current price").is_not_null()
).height ).height
print(f" kNN blended: {n_blended:,} of {n_estimated:,} estimates") print(f" kNN blended: {n_blended:,} of {n_estimated:,} estimates")

View file

@ -21,6 +21,10 @@ from pipeline.transform.price_estimation.utils import (
KNN_K = 20 KNN_K = 20
KNN_MIN_NEIGHBORS = 5 KNN_MIN_NEIGHBORS = 5
KNN_BLEND_WEIGHT = 0.35 KNN_BLEND_WEIGHT = 0.35
MIN_COMPARABLE_FLOOR_AREA_SQM = 15.0
MAX_COMPARABLE_FLOOR_AREA_SQM = 1_000.0
MIN_COMPARABLE_PSM = 500.0
MAX_COMPARABLE_PSM = 50_000.0
def _scale_coords(lat: np.ndarray, lon: np.ndarray) -> np.ndarray: def _scale_coords(lat: np.ndarray, lon: np.ndarray) -> np.ndarray:
@ -33,13 +37,14 @@ def build_knn_pool(
index: pl.DataFrame, index: pl.DataFrame,
ref_frac_year: float, ref_frac_year: float,
max_sale_year: int | None = None, max_sale_year: int | None = None,
) -> dict[str, tuple[KDTree, np.ndarray]]: ) -> dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
"""Build per-type_group KD-trees of index-adjusted price-per-sqm. """Build per-type_group KD-trees of index-adjusted price-per-sqm.
Adjusts all pool properties' sale prices to ref_frac_year using the index, Adjusts all pool properties' sale prices to ref_frac_year using the index,
then builds a KD-tree per type_group for nearest-neighbor queries. then builds a KD-tree per type_group for nearest-neighbor queries.
Returns dict mapping type_group -> (KDTree over scaled lat/lon, adjusted_psm array). Returns dict mapping type_group to KDTree, adjusted PSM, and sale identity
arrays used to keep the target sale out of its own comparable set.
""" """
print("Building kNN pool...") print("Building kNN pool...")
lf = pl.scan_parquet(source) if isinstance(source, Path) else source lf = pl.scan_parquet(source) if isinstance(source, Path) else source
@ -55,7 +60,8 @@ def build_knn_pool(
pl.col("lat").is_not_null(), pl.col("lat").is_not_null(),
pl.col("lon").is_not_null(), pl.col("lon").is_not_null(),
pl.col("Total floor area (sqm)").is_not_null(), pl.col("Total floor area (sqm)").is_not_null(),
pl.col("Total floor area (sqm)") > 0, pl.col("Total floor area (sqm)") >= MIN_COMPARABLE_FLOOR_AREA_SQM,
pl.col("Total floor area (sqm)") <= MAX_COMPARABLE_FLOOR_AREA_SQM,
pl.col("Last known price").is_not_null(), pl.col("Last known price").is_not_null(),
pl.col("Last known price") > 0, pl.col("Last known price") > 0,
pl.col("Postcode").is_not_null(), pl.col("Postcode").is_not_null(),
@ -97,12 +103,13 @@ def build_knn_pool(
).filter( ).filter(
pl.col("_adj_psm").is_not_null(), pl.col("_adj_psm").is_not_null(),
pl.col("_adj_psm").is_finite(), pl.col("_adj_psm").is_finite(),
pl.col("_adj_psm") > 0, pl.col("_adj_psm") >= MIN_COMPARABLE_PSM,
pl.col("_adj_psm") <= MAX_COMPARABLE_PSM,
) )
print(f" {len(pool):,} after index adjustment") print(f" {len(pool):,} after index adjustment")
# Build per-type KD-trees # Build per-type KD-trees
trees: dict[str, tuple[KDTree, np.ndarray]] = {} trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]] = {}
for tg in TYPE_GROUPS: for tg in TYPE_GROUPS:
sub = pool.filter(pl.col("type_group") == tg) sub = pool.filter(pl.col("type_group") == tg)
n = len(sub) n = len(sub)
@ -111,19 +118,49 @@ def build_knn_pool(
lat = sub["lat"].to_numpy().astype(np.float64) lat = sub["lat"].to_numpy().astype(np.float64)
lon = sub["lon"].to_numpy().astype(np.float64) lon = sub["lon"].to_numpy().astype(np.float64)
psm = sub["_adj_psm"].to_numpy().astype(np.float64) psm = sub["_adj_psm"].to_numpy().astype(np.float64)
postcodes = sub["Postcode"].fill_null("").to_numpy()
prices = sub["Last known price"].to_numpy().astype(np.float64)
sale_dates = (
sub["Date of last transaction"]
.dt.epoch("d")
.fill_null(-1)
.to_numpy()
.astype(np.int64)
)
tree = KDTree(_scale_coords(lat, lon)) tree = KDTree(_scale_coords(lat, lon))
trees[tg] = (tree, psm) trees[tg] = (tree, psm, postcodes, prices, sale_dates)
print(f" {tg}: {n:,}") print(f" {tg}: {n:,}")
return trees return trees
def _sale_identity_matches(
pool_postcodes: np.ndarray,
pool_prices: np.ndarray,
pool_sale_dates: np.ndarray,
target_postcode: str,
target_price: float,
target_sale_date: int,
) -> np.ndarray:
if not target_postcode or not np.isfinite(target_price) or target_sale_date < 0:
return np.zeros(len(pool_postcodes), dtype=bool)
return (
(pool_postcodes == target_postcode)
& np.isfinite(pool_prices)
& np.isclose(pool_prices, target_price, rtol=0.0, atol=0.5)
& (pool_sale_dates == target_sale_date)
)
def knn_median_psm( def knn_median_psm(
trees: dict[str, tuple[KDTree, np.ndarray]], trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]],
lat: np.ndarray, lat: np.ndarray,
lon: np.ndarray, lon: np.ndarray,
type_groups: np.ndarray, type_groups: np.ndarray,
k: int = KNN_K, k: int = KNN_K,
postcodes: np.ndarray | None = None,
last_prices: np.ndarray | None = None,
last_sale_dates: np.ndarray | None = None,
) -> np.ndarray: ) -> np.ndarray:
"""Return median adjusted-PSM of k nearest neighbours for each target. """Return median adjusted-PSM of k nearest neighbours for each target.
@ -133,21 +170,41 @@ def knn_median_psm(
n = len(lat) n = len(lat)
result = np.full(n, np.nan) result = np.full(n, np.nan)
for tg, (tree, psm) in trees.items(): for tg, (tree, psm, pool_postcodes, pool_prices, pool_sale_dates) in trees.items():
mask = (type_groups == tg) & np.isfinite(lat) & np.isfinite(lon) mask = (type_groups == tg) & np.isfinite(lat) & np.isfinite(lon)
idx = np.where(mask)[0] idx = np.where(mask)[0]
if len(idx) == 0: if len(idx) == 0:
continue continue
actual_k = min(k, len(psm)) query_k = min(max(k * 2, k + KNN_MIN_NEIGHBORS), len(psm))
if actual_k < KNN_MIN_NEIGHBORS: if query_k < KNN_MIN_NEIGHBORS:
continue continue
coords = _scale_coords(lat[idx], lon[idx]) coords = _scale_coords(lat[idx], lon[idx])
_, nn_idx = tree.query(coords, k=actual_k) _, nn_idx = tree.query(coords, k=query_k)
if nn_idx.ndim == 1: if nn_idx.ndim == 1:
nn_idx = nn_idx.reshape(-1, 1) nn_idx = nn_idx.reshape(-1, 1)
result[idx] = np.nanmedian(psm[nn_idx], axis=1) medians = np.full(len(idx), np.nan)
for row_num, target_idx in enumerate(idx):
candidates = nn_idx[row_num]
if (
postcodes is not None
and last_prices is not None
and last_sale_dates is not None
):
same_sale = _sale_identity_matches(
pool_postcodes[candidates],
pool_prices[candidates],
pool_sale_dates[candidates],
str(postcodes[target_idx] or ""),
float(last_prices[target_idx]),
int(last_sale_dates[target_idx]),
)
candidates = candidates[~same_sale]
if len(candidates) >= KNN_MIN_NEIGHBORS:
medians[row_num] = np.nanmedian(psm[candidates[:k]])
result[idx] = medians
return result return result

View file

@ -19,7 +19,7 @@ TERRACE_TYPES = [
"Terraced", "Terraced",
] ]
FLAT_TYPES = ["Flats/Maisonettes"] FLAT_TYPES = ["Flats/Maisonettes"]
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"] TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats"]
SHRINKAGE_K = 50 SHRINKAGE_K = 50
@ -30,8 +30,6 @@ def type_group_expr():
.then(pl.lit("Terraced")) .then(pl.lit("Terraced"))
.when(pl.col("Property type").is_in(FLAT_TYPES)) .when(pl.col("Property type").is_in(FLAT_TYPES))
.then(pl.lit("Flats")) .then(pl.lit("Flats"))
.when(pl.col("Property type") == "Bungalow")
.then(pl.lit("Bungalow"))
.when(pl.col("Property type").is_in(["Detached", "Semi-Detached"])) .when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
.then(pl.col("Property type")) .then(pl.col("Property type"))
.otherwise(pl.lit(None)) .otherwise(pl.lit(None))
@ -61,7 +59,7 @@ def hierarchy_keys(sector: str) -> tuple[str, str]:
return district, area return district, area
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"] NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats"]
def build_hedonic_features(df: pl.DataFrame) -> np.ndarray: def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:

View file

@ -15,12 +15,21 @@ DROP_CATEGORIES = {
"amenity/bicycle_parking", "amenity/bicycle_parking",
"amenity/binoculars", "amenity/binoculars",
"amenity/boot_scraper", "amenity/boot_scraper",
"amenity/bus_garage",
"amenity/check_in", "amenity/check_in",
"amenity/clock", "amenity/clock",
"amenity/clothes_dryer",
"amenity/coast_guard",
"amenity/coffin_rest",
"amenity/compressed_air", "amenity/compressed_air",
"amenity/court_yard",
"amenity/donation_box", "amenity/donation_box",
"amenity/dressing_room", "amenity/dressing_room",
"amenity/drinking_water", "amenity/drinking_water",
"emergency/water_tank",
"leisure/bleachers",
"leisure/schoolyard",
"public_transport/pay_scale_area",
"shop/taxi", "shop/taxi",
"amenity/feeding_place", "amenity/feeding_place",
"amenity/fixme", "amenity/fixme",
@ -31,6 +40,7 @@ DROP_CATEGORIES = {
"amenity/lounge", "amenity/lounge",
"tourism/preserved_railway", "tourism/preserved_railway",
"amenity/lounger", "amenity/lounger",
"leisure/sport",
"amenity/motorcycle_parking", "amenity/motorcycle_parking",
"amenity/mounting_block", "amenity/mounting_block",
"amenity/notice_board", "amenity/notice_board",
@ -71,8 +81,12 @@ DROP_CATEGORIES = {
"amenity/boat_storage", "amenity/boat_storage",
"amenity/bureau_de_change", "amenity/bureau_de_change",
"amenity/bus_station", "amenity/bus_station",
"amenity/beachhut",
"amenity/canteen",
"amenity/conference_centre", "amenity/conference_centre",
"amenity/crematorium", "amenity/crematorium",
"amenity/disused",
"amenity/driver_training",
"amenity/driving_school", "amenity/driving_school",
"amenity/escooter_rental", "amenity/escooter_rental",
"amenity/ferry_terminal", "amenity/ferry_terminal",
@ -82,14 +96,21 @@ DROP_CATEGORIES = {
"amenity/kick-scooter_rental", "amenity/kick-scooter_rental",
"amenity/money_transfer", "amenity/money_transfer",
"amenity/post_depot", "amenity/post_depot",
"amenity/prison",
"amenity/public_building", "amenity/public_building",
"amenity/recycling", "amenity/recycling",
"amenity/scout_hut", "amenity/scout_hut",
"amenity/social_facility", "amenity/social_facility",
"amenity/studio", "amenity/studio",
"amenity/student_accommodation",
"amenity/taxi", "amenity/taxi",
"amenity/telephone_exchange",
"amenity/training", "amenity/training",
"amenity/vehicle_inspection", "amenity/vehicle_inspection",
"amenity/waiting_room",
"amenity/yes",
"shop/disused",
"shop/no",
# Buildings (except church & university which are mapped) # Buildings (except church & university which are mapped)
"building/air_shaft", "building/air_shaft",
"building/apartments", "building/apartments",
@ -148,12 +169,14 @@ DROP_CATEGORIES = {
"emergency/yes", "emergency/yes",
"tourism/apartment", "tourism/apartment",
"tourism/apartments", "tourism/apartments",
"tourism/alpine_hut",
"tourism/camp_pitch", "tourism/camp_pitch",
"tourism/caravan_site", "tourism/caravan_site",
"tourism/information", "tourism/information",
"tourism/picnic_site", "tourism/picnic_site",
"tourism/viewpoint", "tourism/viewpoint",
"tourism/village_sign", "tourism/village_sign",
"tourism/wilderness_hut",
"tourism/yes", "tourism/yes",
# Public transport (from NaPTAN instead) # Public transport (from NaPTAN instead)
"public_transport/entrance", "public_transport/entrance",
@ -191,6 +214,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🍺", "🍺",
[ [
"amenity/pub", "amenity/pub",
"amenity/beer_garden",
"amenity/biergarten",
"amenity/social_club", "amenity/social_club",
"amenity/club", "amenity/club",
"leisure/social_club", "leisure/social_club",
@ -293,7 +318,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"leisure/miniature_golf", "leisure/miniature_golf",
"leisure/horse_riding", "leisure/horse_riding",
"leisure/fishing", "leisure/fishing",
"leisure/ice_rink",
"leisure/paddling_pool",
"leisure/practice_pitch",
"leisure/shooting_ground",
"leisure/stadium",
"leisure/swimming_pool", "leisure/swimming_pool",
"leisure/swimming_area",
"leisure/water_park", "leisure/water_park",
"leisure/bathing_place", "leisure/bathing_place",
], ],
@ -307,9 +338,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"leisure/amusement_arcade", "leisure/amusement_arcade",
"leisure/adult_gaming_centre", "leisure/adult_gaming_centre",
"leisure/escape_game", "leisure/escape_game",
"leisure/maze",
"leisure/trampoline_park", "leisure/trampoline_park",
"leisure/sauna", "leisure/sauna",
"leisure/tanning_salon", "leisure/tanning_salon",
"shop/amusements",
"tourism/theme_park", "tourism/theme_park",
"amenity/bicycle_rental", "amenity/bicycle_rental",
"amenity/boat_rental", "amenity/boat_rental",
@ -345,6 +378,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[ [
"shop/bakery", "shop/bakery",
"shop/pastry", "shop/pastry",
"craft/bakery",
"craft/confectionery", "craft/confectionery",
], ],
), ),
@ -364,6 +398,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[ [
"shop/greengrocer", "shop/greengrocer",
"shop/farm", "shop/farm",
"shop/market",
"amenity/marketplace", "amenity/marketplace",
], ],
), ),
@ -424,6 +459,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/appliance", "shop/appliance",
"shop/electrical", "shop/electrical",
"shop/hifi", "shop/hifi",
"shop/vacuum_cleaner",
"shop/video_games", "shop/video_games",
"shop/games", "shop/games",
], ],
@ -444,7 +480,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[ [
"shop/doityourself", "shop/doityourself",
"shop/hardware", "shop/hardware",
"shop/builders_merchant",
"shop/paint", "shop/paint",
"shop/plumbing",
], ],
), ),
( (
@ -462,11 +500,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/curtain", "shop/curtain",
"shop/flooring", "shop/flooring",
"shop/fireplace", "shop/fireplace",
"shop/garden_furniture",
"shop/groundskeeping",
"shop/household", "shop/household",
"shop/household_linen", "shop/household_linen",
"shop/houseware", "shop/houseware",
"shop/homeware",
"shop/interior_decoration", "shop/interior_decoration",
"shop/lighting", "shop/lighting",
"shop/kitchenware",
"shop/window_blind", "shop/window_blind",
], ],
), ),
@ -493,8 +535,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🏕️", "🏕️",
[ [
"shop/sports", "shop/sports",
"shop/angling",
"shop/outdoor", "shop/outdoor",
"shop/bicycle", "shop/bicycle",
"shop/equestrian",
"shop/surf",
], ],
), ),
( (
@ -532,9 +577,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/music", "shop/music",
"shop/musical_instrument", "shop/musical_instrument",
"shop/antiques", "shop/antiques",
"shop/anime",
"shop/baby_goods", "shop/baby_goods",
"shop/fabric", "shop/fabric",
"shop/haberdashery", "shop/haberdashery",
"shop/hobby",
"shop/wool", "shop/wool",
"shop/pottery", "shop/pottery",
], ],
@ -549,9 +596,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/bookmaker", "shop/bookmaker",
"shop/building_materials", "shop/building_materials",
"shop/camera", "shop/camera",
"shop/cannabis",
"shop/car", "shop/car",
"shop/caravan", "shop/caravan",
"shop/catalogue", "shop/catalogue",
"shop/auction",
"shop/auction_house",
"shop/chandler",
"shop/collector", "shop/collector",
"shop/copyshop", "shop/copyshop",
"shop/country_store", "shop/country_store",
@ -560,6 +611,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/erotic", "shop/erotic",
"shop/esoteric", "shop/esoteric",
"shop/fan", "shop/fan",
"shop/fireworks",
"shop/fishing", "shop/fishing",
"shop/frame", "shop/frame",
"shop/fuel", "shop/fuel",
@ -582,6 +634,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/scuba_diving", "shop/scuba_diving",
"shop/security", "shop/security",
"shop/sewing", "shop/sewing",
"shop/ship_chandler",
"shop/signs",
"shop/storage_rental", "shop/storage_rental",
"shop/swimming_pool", "shop/swimming_pool",
"shop/telecommunication", "shop/telecommunication",
@ -590,7 +644,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/tool_hire", "shop/tool_hire",
"shop/trade", "shop/trade",
"shop/trophy", "shop/trophy",
"shop/truck",
"shop/vacant", "shop/vacant",
"shop/van",
"shop/video", "shop/video",
"shop/water_sports", "shop/water_sports",
"shop/weapons", "shop/weapons",
@ -611,6 +667,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/cosmetics", "shop/cosmetics",
"shop/massage", "shop/massage",
"shop/perfumery", "shop/perfumery",
"leisure/spa",
], ],
), ),
( (
@ -757,6 +814,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[ [
"amenity/hospital", "amenity/hospital",
"amenity/clinic", "amenity/clinic",
"amenity/health_centre",
"healthcare/blood_donation",
"healthcare/hospital", "healthcare/hospital",
"healthcare/centre", "healthcare/centre",
"healthcare/clinic", "healthcare/clinic",
@ -804,6 +863,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[ [
"amenity/care_home", "amenity/care_home",
"amenity/nursing_home", "amenity/nursing_home",
"amenity/retirement_home",
"healthcare/hospice",
"healthcare/nursing_home",
"office/home_care", "office/home_care",
], ],
), ),
@ -848,6 +910,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"", "",
[ [
"amenity/place_of_worship", "amenity/place_of_worship",
"amenity/monastery",
"building/church", "building/church",
], ],
), ),
@ -873,6 +936,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"📸", "📸",
[ [
"tourism/attraction", "tourism/attraction",
"tourism/aquarium",
"amenity/fountain", "amenity/fountain",
"amenity/courthouse", "amenity/courthouse",
"tourism/chalet", "tourism/chalet",
@ -892,6 +956,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"building/university", "building/university",
"amenity/kindergarten", "amenity/kindergarten",
"amenity/childcare", "amenity/childcare",
"office/tutoring",
], ],
), ),
( (
@ -904,6 +969,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"tourism/guest_house", "tourism/guest_house",
"tourism/motel", "tourism/motel",
"tourism/camp_site", "tourism/camp_site",
"leisure/resort",
"tourism/holiday_park",
"tourism/self_catering",
], ],
), ),
( (
@ -928,14 +996,19 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"craft/window_construction", "craft/window_construction",
"craft/agricultural_engines", "craft/agricultural_engines",
"craft/atelier", "craft/atelier",
"craft/beekeeper",
"craft/blacksmith", "craft/blacksmith",
"craft/bookbinder", "craft/bookbinder",
"craft/boatbuilder",
"craft/caterer", "craft/caterer",
"craft/carpet_layer",
"craft/clockmaker",
"craft/handicraft", "craft/handicraft",
"craft/jeweller", "craft/jeweller",
"craft/metal_construction", "craft/metal_construction",
"craft/photographer", "craft/photographer",
"craft/photographic_laboratory", "craft/photographic_laboratory",
"craft/plasterer",
"craft/pottery", "craft/pottery",
"craft/printer", "craft/printer",
"craft/sawmill", "craft/sawmill",
@ -946,22 +1019,28 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"craft/upholsterer", "craft/upholsterer",
"craft/watchmaker", "craft/watchmaker",
"craft/yes", "craft/yes",
"amenity/workshop",
"shop/glaziery", "shop/glaziery",
"shop/windows", "shop/windows",
# Professional offices & estate agents # Professional offices & estate agents
"shop/estate_agent", "shop/estate_agent",
"office/accountant", "office/accountant",
"office/architect", "office/architect",
"office/auctioneer",
"office/builder",
"office/construction",
"office/construction_company", "office/construction_company",
"office/engineer", "office/engineer",
"office/estate_agent", "office/estate_agent",
"office/financial", "office/financial",
"office/financial_advisor", "office/financial_advisor",
"office/financial_services",
"office/insurance", "office/insurance",
"office/lawyer", "office/lawyer",
"office/mortgage", "office/mortgage",
"office/property_management", "office/property_management",
"office/solicitor", "office/solicitor",
"office/solicitors",
"office/surveyor", "office/surveyor",
"office/tax_advisor", "office/tax_advisor",
], ],
@ -972,6 +1051,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🏢", "🏢",
[ [
"amenity/coworking_space", "amenity/coworking_space",
"amenity/research_institute",
"office/administrative",
"office/advertising_agency", "office/advertising_agency",
"office/association", "office/association",
"office/charity", "office/charity",
@ -997,12 +1078,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"office/notary", "office/notary",
"office/political_party", "office/political_party",
"office/politician", "office/politician",
"office/publisher",
"office/quango",
"office/recruitment", "office/recruitment",
"office/religion", "office/religion",
"office/research", "office/research",
"office/security", "office/security",
"office/taxi", "office/taxi",
"office/telecommunication", "office/telecommunication",
"office/transport",
"office/union", "office/union",
"office/university", "office/university",
"office/vacant", "office/vacant",
@ -1032,7 +1116,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"Community Centre", "Community Centre",
"🤝", "🤝",
[ [
"amenity/church_hall",
"amenity/clubhouse",
"amenity/community_centre", "amenity/community_centre",
"amenity/community_hall",
"amenity/scout_hall",
"amenity/social_centre", "amenity/social_centre",
"amenity/townhall", "amenity/townhall",
], ],

View file

@ -7,6 +7,8 @@ from scipy.spatial import cKDTree
from .haversine import haversine_km from .haversine import haversine_km
EARTH_RADIUS_KM = 6371.0088 EARTH_RADIUS_KM = 6371.0088
KM_PER_DEGREE_LAT = 111.32
DEFAULT_GRID_SIZE_DEGREES = 0.02
def _build_poi_grid( def _build_poi_grid(
@ -34,16 +36,29 @@ def _build_poi_grid(
def _get_nearby_indices( def _get_nearby_indices(
pc_lat: float, pc_lon: float, poi_grid: dict, grid_size: float = 0.05 pc_lat: float,
pc_lon: float,
poi_grid: dict,
radius_km: float,
grid_size: float = DEFAULT_GRID_SIZE_DEGREES,
) -> np.ndarray | None: ) -> np.ndarray | None:
"""Get POI indices from grid cells near the given coordinate.""" """Get POI indices from all grid cells intersecting the radius bounding box."""
grid_lat = int(np.floor(pc_lat / grid_size)) if not np.isfinite(pc_lat) or not np.isfinite(pc_lon):
grid_lng = int(np.floor(pc_lon / grid_size)) return None
lat_delta = radius_km / KM_PER_DEGREE_LAT
cos_lat = abs(np.cos(np.radians(pc_lat)))
lng_delta = 180.0 if cos_lat < 1e-12 else radius_km / (KM_PER_DEGREE_LAT * cos_lat)
min_grid_lat = int(np.floor((pc_lat - lat_delta) / grid_size))
max_grid_lat = int(np.floor((pc_lat + lat_delta) / grid_size))
min_grid_lng = int(np.floor((pc_lon - lng_delta) / grid_size))
max_grid_lng = int(np.floor((pc_lon + lng_delta) / grid_size))
nearby_indices = [] nearby_indices = []
for dlat in [-1, 0, 1]: for grid_lat in range(min_grid_lat, max_grid_lat + 1):
for dlng in [-1, 0, 1]: for grid_lng in range(min_grid_lng, max_grid_lng + 1):
cell_key = (grid_lat + dlat, grid_lng + dlng) cell_key = (grid_lat, grid_lng)
if cell_key in poi_grid: if cell_key in poi_grid:
nearby_indices.append(poi_grid[cell_key]) nearby_indices.append(poi_grid[cell_key])
@ -83,7 +98,7 @@ def count_pois_per_postcode(
n_pois = len(pois) n_pois = len(pois)
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs") print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
grid_size = 0.05 grid_size = DEFAULT_GRID_SIZE_DEGREES
print(" Building POI spatial grid...") print(" Building POI spatial grid...")
poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size) poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size)
print(f" POI grid has {len(poi_grid):,} occupied cells") print(f" POI grid has {len(poi_grid):,} occupied cells")
@ -120,7 +135,9 @@ def count_pois_per_postcode(
# Process batch # Process batch
for i in range(start_idx, end_idx): for i in range(start_idx, end_idx):
nearby = _get_nearby_indices(pc_lats[i], pc_lons[i], poi_grid, grid_size) nearby = _get_nearby_indices(
pc_lats[i], pc_lons[i], poi_grid, radius_km, grid_size
)
if nearby is None: if nearby is None:
continue continue

View file

@ -215,6 +215,14 @@ struct Cli {
#[arg(long, env = "STRIPE_REFERRAL_COUPON_ID")] #[arg(long, env = "STRIPE_REFERRAL_COUPON_ID")]
stripe_referral_coupon_id: String, stripe_referral_coupon_id: String,
/// Bearer token required to scrape /metrics.
#[arg(long, env = "METRICS_BEARER_TOKEN")]
metrics_bearer_token: Option<String>,
/// Allow unauthenticated /metrics scraping when no METRICS_BEARER_TOKEN is set.
#[arg(long, env = "ALLOW_PUBLIC_METRICS", default_value_t = false)]
allow_public_metrics: bool,
/// Google OAuth client ID for PocketBase SSO /// Google OAuth client ID for PocketBase SSO
#[arg(long, env = "GOOGLE_OAUTH_CLIENT_ID")] #[arg(long, env = "GOOGLE_OAUTH_CLIENT_ID")]
google_oauth_client_id: String, google_oauth_client_id: String,
@ -246,6 +254,8 @@ async fn main() -> anyhow::Result<()> {
info!("Prometheus metrics initialized"); info!("Prometheus metrics initialized");
let cli = Cli::parse(); let cli = Cli::parse();
let metrics_bearer_token = cli.metrics_bearer_token.clone();
let allow_public_metrics = cli.allow_public_metrics;
for (label, path) in [ for (label, path) in [
("Properties", &cli.properties), ("Properties", &cli.properties),
@ -510,7 +520,10 @@ async fn main() -> anyhow::Result<()> {
let public_url_tiles = initial_state.public_url.clone(); let public_url_tiles = initial_state.public_url.clone();
let api = Router::new() let api = Router::new()
.route("/api/features", get(routes::get_features)) .route(
"/api/features",
get(routes::get_features).layer(ConcurrencyLimitLayer::new(20)),
)
.route( .route(
"/api/hexagons", "/api/hexagons",
get(routes::get_hexagons).layer(ConcurrencyLimitLayer::new(20)), get(routes::get_hexagons).layer(ConcurrencyLimitLayer::new(20)),
@ -519,30 +532,57 @@ async fn main() -> anyhow::Result<()> {
"/api/postcodes", "/api/postcodes",
get(routes::get_postcodes).layer(ConcurrencyLimitLayer::new(20)), get(routes::get_postcodes).layer(ConcurrencyLimitLayer::new(20)),
) )
.route("/api/postcode/{postcode}", get(routes::get_postcode_lookup)) .route(
.route("/api/nearest-postcode", get(routes::get_nearest_postcode)) "/api/postcode/{postcode}",
get(routes::get_postcode_lookup).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/nearest-postcode",
get(routes::get_nearest_postcode).layer(ConcurrencyLimitLayer::new(10)),
)
.route( .route(
"/api/pois", "/api/pois",
get(routes::get_pois).layer(ConcurrencyLimitLayer::new(20)), get(routes::get_pois).layer(ConcurrencyLimitLayer::new(20)),
) )
.route("/api/poi-categories", get(routes::get_poi_categories)) .route(
.route("/api/places", get(routes::get_places)) "/api/poi-categories",
.route("/api/travel-modes", get(routes::get_travel_modes)) get(routes::get_poi_categories).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/places",
get(routes::get_places).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/travel-modes",
get(routes::get_travel_modes).layer(ConcurrencyLimitLayer::new(20)),
)
.route( .route(
"/api/travel-destinations", "/api/travel-destinations",
get(routes::get_travel_destinations), get(routes::get_travel_destinations).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/journey",
get(routes::get_journey).layer(ConcurrencyLimitLayer::new(10)),
) )
.route("/api/journey", get(routes::get_journey))
.route( .route(
"/api/hexagon-properties", "/api/hexagon-properties",
get(routes::get_hexagon_properties), get(routes::get_hexagon_properties).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/filter-counts",
get(routes::get_filter_counts).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/hexagon-stats",
get(routes::get_hexagon_stats).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/postcode-stats",
get(routes::get_postcode_stats).layer(ConcurrencyLimitLayer::new(5)),
) )
.route("/api/filter-counts", get(routes::get_filter_counts))
.route("/api/hexagon-stats", get(routes::get_hexagon_stats))
.route("/api/postcode-stats", get(routes::get_postcode_stats))
.route( .route(
"/api/postcode-properties", "/api/postcode-properties",
get(routes::get_postcode_properties), get(routes::get_postcode_properties).layer(ConcurrencyLimitLayer::new(10)),
) )
.route( .route(
"/api/screenshot", "/api/screenshot",
@ -552,13 +592,26 @@ async fn main() -> anyhow::Result<()> {
"/api/export", "/api/export",
get(routes::get_export).layer(ConcurrencyLimitLayer::new(3)), get(routes::get_export).layer(ConcurrencyLimitLayer::new(3)),
) )
.route("/api/me", get(routes::get_me)) .route(
.route("/api/shorten", post(routes::post_shorten)) "/api/me",
get(routes::get_me).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/shorten",
post(routes::post_shorten).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/share-links",
get(routes::get_share_links).layer(ConcurrencyLimitLayer::new(10)),
)
.route( .route(
"/api/ai-filters", "/api/ai-filters",
post(routes::post_ai_filters).layer(ConcurrencyLimitLayer::new(5)), post(routes::post_ai_filters).layer(ConcurrencyLimitLayer::new(5)),
) )
.route("/api/streetview", get(routes::get_streetview)) .route(
"/api/streetview",
get(routes::get_streetview).layer(ConcurrencyLimitLayer::new(5)),
)
.route( .route(
"/api/rightmove-search", "/api/rightmove-search",
get(routes::get_rightmove_redirect).layer(ConcurrencyLimitLayer::new(10)), get(routes::get_rightmove_redirect).layer(ConcurrencyLimitLayer::new(10)),
@ -567,23 +620,44 @@ async fn main() -> anyhow::Result<()> {
"/api/newsletter", "/api/newsletter",
patch(routes::patch_newsletter).layer(ConcurrencyLimitLayer::new(10)), patch(routes::patch_newsletter).layer(ConcurrencyLimitLayer::new(10)),
) )
.route("/api/pricing", get(routes::get_pricing)) .route(
"/api/pricing",
get(routes::get_pricing).layer(ConcurrencyLimitLayer::new(20)),
)
.route( .route(
"/api/checkout", "/api/checkout",
post(routes::post_checkout).layer(ConcurrencyLimitLayer::new(10)), post(routes::post_checkout).layer(ConcurrencyLimitLayer::new(10)),
) )
.route("/api/stripe-webhook", post(routes::post_stripe_webhook)) .route(
"/api/stripe-webhook",
post(routes::post_stripe_webhook).layer(ConcurrencyLimitLayer::new(10)),
)
.route( .route(
"/api/invites", "/api/invites",
get(routes::get_invites).post(routes::post_invites), get(routes::get_invites)
.post(routes::post_invites)
.layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/invite/{code}",
get(routes::get_invite).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/redeem-invite",
post(routes::post_redeem_invite).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/s/{code}",
get(routes::get_short_url).layer(ConcurrencyLimitLayer::new(10)),
) )
.route("/api/invite/{code}", get(routes::get_invite))
.route("/api/redeem-invite", post(routes::post_redeem_invite))
.route("/s/{code}", get(routes::get_short_url))
.route( .route(
"/api/telemetry", "/api/telemetry",
post(routes::post_telemetry).layer(ConcurrencyLimitLayer::new(20)), post(routes::post_telemetry).layer(ConcurrencyLimitLayer::new(20)),
) )
.route(
"/pb/api/realtime",
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(50)),
)
.route( .route(
"/pb/{*rest}", "/pb/{*rest}",
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(10)), any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(10)),
@ -591,19 +665,28 @@ async fn main() -> anyhow::Result<()> {
// Tile routes use a different state type — kept as closures // Tile routes use a different state type — kept as closures
.route( .route(
"/api/tiles/{z}/{x}/{y}", "/api/tiles/{z}/{x}/{y}",
get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path)), get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path))
.layer(ConcurrencyLimitLayer::new(30)),
) )
.route( .route(
"/api/tiles/style.json", "/api/tiles/style.json",
get(move |query| { get(move |query| {
let pu = public_url_tiles.clone(); let pu = public_url_tiles.clone();
routes::get_style(axum::extract::State(reader_style.clone()), pu, query) routes::get_style(axum::extract::State(reader_style.clone()), pu, query)
}), })
.layer(ConcurrencyLimitLayer::new(20)),
) )
.route("/health", get(|| async { "ok" })) .route("/health", get(|| async { "ok" }))
.route( .route(
"/metrics", "/metrics",
get(move || metrics::metrics_handler(metrics_handle.clone())), get(move |headers| {
metrics::metrics_handler(
metrics_handle.clone(),
metrics_bearer_token.clone(),
allow_public_metrics,
headers,
)
}),
) )
.with_state(shared.clone()); .with_state(shared.clone());

View file

@ -8,7 +8,7 @@ use serde::Deserialize;
use tracing::{info, warn}; use tracing::{info, warn};
use crate::auth::OptionalUser; use crate::auth::OptionalUser;
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET}; use crate::consts::{DEFAULT_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET};
use crate::licensing::{check_license_point, resolve_share_code}; use crate::licensing::{check_license_point, resolve_share_code};
use crate::parsing::{parse_filters_with_poi, row_passes_filters, row_passes_poi_filters}; use crate::parsing::{parse_filters_with_poi, row_passes_filters, row_passes_poi_filters};
use crate::state::SharedState; use crate::state::SharedState;
@ -151,10 +151,7 @@ pub async fn get_postcode_properties(
}); });
let total = matching_rows.len(); let total = matching_rows.len();
let limit = params let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT);
.limit
.unwrap_or(DEFAULT_PROPERTIES_LIMIT)
.min(MAX_PROPERTIES_LIMIT);
let page_offset = params.offset.unwrap_or(0); let page_offset = params.offset.unwrap_or(0);
let truncated = total > page_offset + limit; let truncated = total > page_offset + limit;

View file

@ -12,7 +12,6 @@ use tracing::info;
use crate::aggregation::{Aggregator, EnumDistConfig, PoiAggregator}; use crate::aggregation::{Aggregator, EnumDistConfig, PoiAggregator};
use crate::auth::OptionalUser; use crate::auth::OptionalUser;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::data::travel_time::TravelData; use crate::data::travel_time::TravelData;
use crate::licensing::{check_license_bounds, resolve_share_code}; use crate::licensing::{check_license_bounds, resolve_share_code};
use crate::parsing::{ use crate::parsing::{
@ -354,73 +353,61 @@ pub async fn get_postcodes(
features.push(feature); features.push(feature);
included_postcodes.insert(pc_idx); included_postcodes.insert(pc_idx);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
} }
if features.len() < MAX_CELLS_PER_REQUEST { for pc_idx in selectable_postcodes {
for pc_idx in selectable_postcodes { if included_postcodes.contains(&pc_idx) {
if included_postcodes.contains(&pc_idx) { continue;
continue;
}
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = postcode_data.geometry_geojson(pc_idx);
let centroid = postcode_data.centroids[pc_idx];
let mut props = Map::new();
props.insert(
"postcode".into(),
Value::String(postcode_data.postcodes[pc_idx].clone()),
);
props.insert("count".into(), Value::from(0));
props.insert(
"centroid".into(),
Value::Array(vec![
Value::from(centroid.1 as f64),
Value::from(centroid.0 as f64),
]),
);
let mut feature = Map::new();
feature.insert("type".into(), Value::String("Feature".into()));
feature.insert("geometry".into(), geometry);
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
} }
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = postcode_data.geometry_geojson(pc_idx);
let centroid = postcode_data.centroids[pc_idx];
let mut props = Map::new();
props.insert(
"postcode".into(),
Value::String(postcode_data.postcodes[pc_idx].clone()),
);
props.insert("count".into(), Value::from(0));
props.insert(
"centroid".into(),
Value::Array(vec![
Value::from(centroid.1 as f64),
Value::from(centroid.0 as f64),
]),
);
let mut feature = Map::new();
feature.insert("type".into(), Value::String("Feature".into()));
feature.insert("geometry".into(), geometry);
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
} }
histogram!("postcodes_response_count").record(features.len() as f64); histogram!("postcodes_response_count").record(features.len() as f64);
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed(); let t_total = t0.elapsed();
info!( info!(
postcodes_before_filter, postcodes_before_filter,
matching_postcodes, matching_postcodes,
postcodes_after_filter = features.len(), postcodes_after_filter = features.len(),
filtered_out, filtered_out,
truncated,
bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east), bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east),
filters = num_filters, filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"), filters_raw = filters_str.as_deref().unwrap_or("-"),

View file

@ -3,7 +3,6 @@ import {
hex, hex,
vfrac, vfrac,
type Activity, type Activity,
type AdScene,
type Storyboard, type Storyboard,
type TravelTimeFilter, type TravelTimeFilter,
type VideoConfig, type VideoConfig,
@ -629,30 +628,6 @@ const AD_DEFAULT_FILTERS: Record<string, [number, number] | string[]> = {
'Outstanding primary schools within 2km': [0, 10], 'Outstanding primary schools within 2km': [0, 10],
}; };
/**
* Stable Unsplash CDN photo URLs. Each one is a 720-wide JPEG fetched at
* record time. The CDN serves with permissive CORS, no auth needed, and
* the IDs are stable URLs (Unsplash does not rotate them). If any photo
* stops resolving, dom.ts hides the broken image and the rest of the
* scene still renders, so a 404 here degrades to text-only rather than
* breaking the ad. To swap a photo, search unsplash.com for the theme
* and paste the `photo-{id}` slug from the URL bar.
*/
const PHOTO = {
terracedRow: 'https://images.unsplash.com/photo-1769344694490-66fb22a8d8cf?w=720&q=80&auto=format&fit=crop',
brickStreet: 'https://images.unsplash.com/photo-1689867373120-355ce130d485?w=720&q=80&auto=format&fit=crop',
woodAccentHouses: 'https://images.unsplash.com/photo-1753198412280-b4a9729c1c51?w=720&q=80&auto=format&fit=crop',
colourfulRow: 'https://images.unsplash.com/photo-1718579019220-98697dc2fd72?w=720&q=80&auto=format&fit=crop',
busyTraffic: 'https://images.unsplash.com/photo-1645718171033-574c88494de2?w=720&q=80&auto=format&fit=crop',
cityTraffic: 'https://images.unsplash.com/photo-1714128949057-f7ac4cb71e6c?w=720&q=80&auto=format&fit=crop',
trafficLight: 'https://images.unsplash.com/photo-1680276553514-357f2edc46a1?w=720&q=80&auto=format&fit=crop',
leafySuburb: 'https://images.unsplash.com/photo-1663651884092-a2449ed3671a?w=720&q=80&auto=format&fit=crop',
suburbHomes: 'https://images.unsplash.com/photo-1768301346584-86e781872b82?w=720&q=80&auto=format&fit=crop',
trainPlatform: 'https://images.unsplash.com/photo-1684934899514-772e03714de5?w=720&q=80&auto=format&fit=crop',
trainClock: 'https://images.unsplash.com/photo-1657441629839-874d398b6e04?w=720&q=80&auto=format&fit=crop',
keysFrontDoor: 'https://images.unsplash.com/photo-1741156386380-0236c72eb6f9?w=720&q=80&auto=format&fit=crop',
};
const linger = (durationMs = 360): Activity[] => [{ kind: 'wait', durationMs }]; const linger = (durationMs = 360): Activity[] => [{ kind: 'wait', durationMs }];
/** /**
@ -781,15 +756,6 @@ const ttDragAct = (toMin: number, durationMs = 1400): Activity => ({
toFraction: toMin / TT_SLIDER_MAX, toFraction: toMin / TT_SLIDER_MAX,
durationMs, durationMs,
}); });
const showScene = (scene: AdScene): Activity => ({
kind: 'showAdScene',
scene,
durationMs: 0,
});
const hideScene = (durationMs = 320): Activity => ({
kind: 'hideAdScene',
durationMs,
});
const wait = (durationMs: number): Activity => ({ kind: 'wait', durationMs }); const wait = (durationMs: number): Activity => ({ kind: 'wait', durationMs });
const mapZoomIn = (durationMs = 1400, steps = 5): Activity => ({ const mapZoomIn = (durationMs = 1400, steps = 5): Activity => ({
kind: 'mapZoom', kind: 'mapZoom',
@ -860,17 +826,18 @@ const LONDON_VIEW = { lat: 51.4672, lon: -0.1276, zoom: 10.5 };
const AD_CONFIGS: DemoAdStoryboardConfig[] = [ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 01 — Search by sentence. Type the prompt on camera, narration runs // 01 — Search by sentence. Type the prompt on camera, narration runs
// simultaneously. Filters relevant: commute + crime + schools. // simultaneously. Filters relevant: price + commute + crime + noise.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-01-london-prompt', name: 'ad-01-london-prompt',
city: 'london', city: 'london',
promptText: promptText:
'Two bed in London, 35 min to centre, lower crime, lower noise', 'London flat under £600k, 35 min to centre, lower crime, lower noise',
filters: { filters: {
'Property type': ['Flats/Maisonettes'],
'Estimated current price': [0, 600000], 'Estimated current price': [0, 600000],
'Serious crime per 1k residents (avg/yr)': [0, 50], 'Serious crime per 1k residents (avg/yr)': [0, 50],
'Road noise score (mean dB)': [0, 60], 'Noise (dB)': [0, 58],
}, },
travelTimeFilters: [ travelTimeFilters: [
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 35 }, { mode: 'transit', slug: 'london', label: 'London city centre', max: 35 },
@ -879,20 +846,20 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 8, posterTimeS: 8,
cues: [ cues: [
{ {
text: 'Describe the London home you actually want.', text: 'Stop searching listing by listing. Search by the area brief.',
during: [typeAct( during: [typeAct(
'Two bed in London, 35 min to centre, lower crime, lower noise', 'London flat under £600k, 35 min to centre, lower crime, lower noise',
2800 2800
)], )],
tail: [wait(200)], tail: [wait(200)],
}, },
{ {
text: 'Hit search. The map answers in one second.', text: 'Price, commute, crime and noise land on the map together.',
during: [submitAct(1100)], during: [submitAct(1100)],
tail: [wait(700)], tail: [wait(700)],
}, },
{ {
text: 'Every lit postcode fits all five rules at once.', text: 'Every lit postcode is somewhere worth checking first.',
tail: [wait(600)], tail: [wait(600)],
}, },
], ],
@ -914,16 +881,16 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 5.5, posterTimeS: 5.5,
cues: [ cues: [
{ {
text: 'Watch what one slider does to your shortlist.', text: 'Your commute limit should change the map, not your patience.',
tail: [wait(200)], tail: [wait(200)],
}, },
{ {
text: 'Drag forty minutes down to fifteen.', text: 'Drag forty minutes down to fifteen minutes.',
during: [ttDragAct(15, 1900)], during: [ttDragAct(15, 1900)],
tail: [wait(700)], tail: [wait(700)],
}, },
{ {
text: 'Half the map just lost its place.', text: 'The reachable postcodes disappear in front of you.',
tail: [wait(600)], tail: [wait(600)],
}, },
], ],
@ -946,17 +913,17 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 10, posterTimeS: 10,
cues: [ cues: [
{ {
text: 'Type the brief. Map fills with matching areas.', text: 'Type a family brief and watch matching areas appear.',
during: [typeAct('Family home in London, decent schools nearby', 2400), submitAct(900)], during: [typeAct('Family home in London, decent schools nearby', 2400), submitAct(900)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Zoom past the hexagons. Real postcodes break open.', text: 'Zoom from area patterns into actual postcodes.',
during: [mapZoomIn(3000, 10)], during: [mapZoomIn(3000, 10)],
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'Tap one. Sold prices, schools, crime, noise.', text: 'Tap one for sold prices and street-level context.',
during: [ during: [
{ kind: 'cursorScale', scale: 1.3, durationMs: 200 }, { kind: 'cursorScale', scale: 1.3, durationMs: 200 },
clickHex(900), clickHex(900),
@ -992,7 +959,7 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 6, posterTimeS: 6,
cues: [ cues: [
{ {
text: 'Four hundred grand. London. Thirty minute commute.', text: 'London under four hundred thousand, with a thirty minute commute.',
during: [typeAct( during: [typeAct(
'Flat in London under £400k, 30 min to centre, lower crime', 'Flat in London under £400k, 30 min to centre, lower crime',
2800 2800
@ -1000,61 +967,57 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'Watch the filters stack and the map shrink.', text: 'The active filters stack up as the map tightens.',
during: [scrollFilters(280, 900)], during: [scrollFilters(280, 900)],
tail: [wait(600)], tail: [wait(600)],
}, },
{ {
text: 'Every lit postcode hits all four rules.', text: 'Now the cheap-looking areas have to pass the brief.',
tail: [wait(500)], tail: [wait(500)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 05 — Two streets apart. Photo split is the hook. Caption stays // 05 — Two streets apart. Product-led now: noise + crime filters are
// SHORT so it does not compete with the overlay's title text. // typed and submitted on screen instead of masking the product with
// generic street photos.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-05-two-streets-apart', name: 'ad-05-two-streets-apart',
city: 'london', city: 'london',
promptText: 'Quieter London, lower road noise', promptText: 'Quiet London streets, lower noise, lower serious crime',
filters: { filters: {
'Road noise score (mean dB)': [0, 58], 'Noise (dB)': [0, 55],
'Serious crime per 1k residents (avg/yr)': [0, 50], 'Serious crime per 1k residents (avg/yr)': [0, 45],
}, },
initialZoom: 10.6, initialZoom: 10.6,
posterTimeS: 4, posterTimeS: 4,
cues: [ cues: [
{ {
text: 'Two homes. Four hundred metres apart.', text: 'Two streets can look identical in a listing photo.',
during: [showScene({ during: [typeAct(
mode: 'split', 'Quiet London streets, lower noise, lower serious crime',
accent: 'rose', 2500
kicker: 'Two streets', ), submitAct(900)],
title: 'Same price tag.', tail: [wait(400)],
images: [PHOTO.terracedRow, PHOTO.busyTraffic], },
left: { title: 'Street A', meta: 'Quiet', tone: 'good' }, {
right: { title: 'Street B', meta: 'Main road', tone: 'bad' }, text: 'Filter noise and serious crime before you book a viewing.',
transparent: false, during: [scrollFilters(220, 800)],
})],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Same price. Completely different lives.', text: 'Now the quieter pockets are the ones left on screen.',
tail: [wait(500)], during: [mapZoomIn(1300, 4)],
}, tail: [wait(600)],
{
text: 'The map knows the difference. The photos do not.',
during: [hideScene(360)],
tail: [wait(700)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 06 — Commute tax. Photo hook (train platform) opens; cue 1 hides // 06 — Commute tax. Starts on the live commute layer and immediately
// the overlay and the travel-time slider drags from 60 → 20 min. // proves the point with the travel-time slider.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-06-london-commute-tax', name: 'ad-06-london-commute-tax',
@ -1068,38 +1031,30 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 4, posterTimeS: 4,
cues: [ cues: [
{ {
text: 'Twenty minutes or sixty. Same asking price.', text: 'A cheap home gets expensive when the commute is wrong.',
during: [showScene({ tail: [wait(300)],
mode: 'title',
accent: 'amber',
kicker: 'Commute tax',
image: PHOTO.trainClock,
title: 'Cheap, until you count the hours.',
})],
tail: [wait(400)],
}, },
{ {
text: 'Drag the slider. Watch the map shrink.', text: 'Drag sixty minutes down to twenty and watch the map shrink.',
during: [hideScene(320), ttDragAct(20, 1800)], during: [ttDragAct(20, 1900)],
tail: [wait(700)], tail: [wait(700)],
}, },
{ {
text: 'Time is the bill you pay every week.', text: 'That weekly time bill is visible before the viewing.',
tail: [wait(600)], tail: [wait(600)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 07 — Quiet near London. Leafy-suburb photo opens; cue 1 hides it // 07 — Quiet near London. Uses the real prod Noise (dB) feature.
// and the dashboard (already filtered for low noise) is revealed.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-07-quiet-near-london', name: 'ad-07-quiet-near-london',
city: 'london', city: 'london',
promptText: 'Quieter London, lower road noise, good transit', promptText: 'Quieter London, lower road noise, good transit',
filters: { filters: {
'Road noise score (mean dB)': [0, 56], 'Noise (dB)': [0, 55],
'Estimated current price': [0, 700000], 'Estimated current price': [0, 700000],
}, },
travelTimeFilters: [ travelTimeFilters: [
@ -1109,30 +1064,25 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 4, posterTimeS: 4,
cues: [ cues: [
{ {
text: 'Quiet streets, near London. They do exist.', text: 'Quiet near London is searchable, not just hopeful.',
during: [showScene({ during: [typeAct('Quieter London, lower road noise, good transit', 2500), submitAct(900)],
mode: 'title',
accent: 'teal',
image: PHOTO.leafySuburb,
title: 'Yes, they exist.',
})],
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'You just have to filter for noise, not price.', text: 'Filter for noise alongside price and travel time.',
during: [hideScene(320), scrollFilters(220, 800)], during: [scrollFilters(220, 800)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'The hidden pockets light up.', text: 'The calmer pockets show up before you go anywhere.',
tail: [wait(500)], tail: [wait(500)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 08 — The postcode comes with the keys. Keys photo opens; map shows // 08 — The postcode comes with the keys. Keeps the memorable premise,
// London filtered for family-friendly area. // but shows the product doing the work instead of a keys stock photo.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-08-postcode-with-the-keys', name: 'ad-08-postcode-with-the-keys',
@ -1142,99 +1092,104 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
'Estimated current price': [0, 750000], 'Estimated current price': [0, 750000],
'Outstanding primary schools within 2km': [1, 10], 'Outstanding primary schools within 2km': [1, 10],
'Serious crime per 1k residents (avg/yr)': [0, 50], 'Serious crime per 1k residents (avg/yr)': [0, 50],
'Noise (dB)': [0, 58],
}, },
travelTimeFilters: [
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 45 },
],
initialZoom: 10.5, initialZoom: 10.5,
posterTimeS: 3, posterTimeS: 3,
cues: [ cues: [
{ {
text: 'You can renovate the kitchen.', text: 'You can change the kitchen. You inherit the postcode.',
during: [showScene({ during: [typeAct(
mode: 'title', 'Family London, lower crime, good schools, lower noise',
accent: 'lime', 2500
image: PHOTO.keysFrontDoor, ), submitAct(900)],
title: 'You keep the postcode forever.',
})],
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'You can not renovate the commute or the noise.', text: 'So check commute, crime, schools and noise first.',
during: [scrollFilters(320, 900)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Pick the area first. The keys come second.', text: 'Pick the area first. The keys come second.',
during: [hideScene(320)], during: [mapZoomIn(1200, 4)],
tail: [wait(600)], tail: [wait(600)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 09 — Waitrose distance. Niche filter that maps to social-class // 09 — Amenities. Waitrose is the memorable example, but the copy
// proxy. We type the brief, scroll the filter pane to surface the // frames it as practical amenity filtering rather than a throwaway gag.
// Waitrose-distance card explicitly.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-09-london-waitrose', name: 'ad-09-london-waitrose',
city: 'london', city: 'london',
promptText: promptText:
'London postcodes within walking distance of a Waitrose', 'London postcodes near Waitrose, tube and parks under £800k',
filters: { filters: {
'Distance to nearest Waitrose (km)': [0, 1], 'Distance to nearest Waitrose (km)': [0, 1],
'Distance to nearest tube station (km)': [0, 1.2],
'Distance to nearest park (km)': [0, 0.8],
'Estimated current price': [0, 800000], 'Estimated current price': [0, 800000],
}, },
initialZoom: 10.4, initialZoom: 10.4,
posterTimeS: 7, posterTimeS: 7,
cues: [ cues: [
{ {
text: 'How close is your nearest Waitrose. Yes, really.', text: 'Amenities should be filters, not guesses from the photos.',
during: [typeAct( during: [typeAct(
'London postcodes within walking distance of a Waitrose', 'London postcodes near Waitrose, tube and parks under £800k',
2800 2800
), submitAct(900)], ), submitAct(900)],
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'The map highlights the lucky postcodes.', text: 'Waitrose, tube, parks and price can all count together.',
during: [scrollFilters(180, 800)], during: [scrollFilters(300, 900)],
tail: [wait(600)], tail: [wait(600)],
}, },
{ {
text: 'It is a real filter, not a meme.', text: 'Now you know which postcodes actually match that lifestyle.',
tail: [wait(500)], tail: [wait(500)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 10 — Reform-voting councils. % Reform UK vote share as a filter. // 10 — Local politics. Matter-of-fact and product-led; lower threshold
// Politically tense — kept matter-of-fact, no spin in the copy. // keeps the map populated while still surfacing the Reform UK feature.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-10-reform-councils', name: 'ad-10-reform-councils',
city: 'london', city: 'leeds',
promptText: promptText:
'Areas where the council voted heavily for Reform UK', 'Areas with higher Reform UK vote share and lower prices',
filters: { filters: {
'% Reform UK': [25, 100], '% Reform UK': [15, 100],
'Estimated current price': [0, 350000],
}, },
initialZoom: 9.5, initialZoom: 10.5,
posterTimeS: 7, posterTimeS: 7,
cues: [ cues: [
{ {
text: 'Want to know which way your future council voted.', text: 'Local politics is part of the neighbourhood data too.',
during: [typeAct( during: [typeAct(
'Areas where the council voted heavily for Reform UK', 'Areas with higher Reform UK vote share and lower prices',
2600 2600
)], )],
tail: [wait(300)], tail: [wait(300)],
}, },
{ {
text: 'Run the filter. See the map.', text: 'Run the filter and see which areas stay in view.',
during: [submitAct(900), scrollFilters(180, 700)], during: [submitAct(900), scrollFilters(180, 700)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Politics shapes the area too.', text: 'No spin. Just another local signal before you buy.',
tail: [wait(500)], tail: [wait(500)],
}, },
], ],
@ -1247,76 +1202,67 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
name: 'ad-11-leeds-families', name: 'ad-11-leeds-families',
city: 'leeds', city: 'leeds',
promptText: promptText:
'Three bed near Leeds, outstanding primary nearby, lower crime', 'Leeds family areas, good primary schools nearby, lower crime',
filters: { filters: {
'Estimated current price': [0, 380000], 'Estimated current price': [0, 380000],
'Outstanding primary schools within 2km': [2, 10], 'Good+ primary schools within 2km': [2, 10],
'Serious crime per 1k residents (avg/yr)': [0, 45], 'Serious crime per 1k residents (avg/yr)': [0, 45],
}, },
initialZoom: 11.0, initialZoom: 11.0,
posterTimeS: 6, posterTimeS: 6,
cues: [ cues: [
{ {
text: 'Leeds, but only the school-run friendly bits.', text: 'Find Leeds areas that work for the school run.',
during: [typeAct( during: [typeAct(
'Three bed near Leeds, outstanding primary nearby, lower crime', 'Leeds family areas, good primary schools nearby, lower crime',
2500 2500
), submitAct(900)], ), submitAct(900)],
tail: [wait(300)], tail: [wait(300)],
}, },
{ {
text: 'Two outstanding primaries within walking distance.', text: 'School quality and serious crime sit beside price.',
during: [scrollFilters(220, 800)], during: [scrollFilters(220, 800)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Every lit postcode is a real candidate.', text: 'Every lit postcode is a better place to start.',
tail: [wait(500)], tail: [wait(500)],
}, },
], ],
}, },
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// 12 — Pricing scarcity. Real prod numbers (verified via /api/pricing // 12 — Pricing/value. Keeps the current £9.99 founder-price hook, but
// at render time): the £0.99 tier is sold out (50/50); the current // proves value through the product instead of a static scarcity card.
// £9.99 tier has 17 slots left before the next jump to £29.99. We
// surface those numbers in a structured rank scene over the live
// dashboard, since recording on the /pricing route would require a
// dashboard URL override and we want to ship this iteration.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
{ {
name: 'ad-12-pricing-scarcity', name: 'ad-12-pricing-scarcity',
city: 'london', city: 'london',
promptText: 'Quieter London, good schools, lower crime', promptText: 'London under £700k, good schools, lower crime and lower noise',
filters: { filters: {
'Estimated current price': [0, 700000], 'Estimated current price': [0, 700000],
'Outstanding primary schools within 2km': [1, 10], 'Outstanding primary schools within 2km': [1, 10],
'Serious crime per 1k residents (avg/yr)': [0, 50],
'Noise (dB)': [0, 58],
}, },
initialZoom: 10.4, initialZoom: 10.4,
posterTimeS: 3, posterTimeS: 3,
cues: [ cues: [
{ {
text: 'Seventeen spots left at nine ninety nine.', text: 'Nine ninety nine beats one wasted viewing.',
during: [showScene({ during: [typeAct(
mode: 'rank', 'London under £700k, good schools, lower crime and lower noise',
accent: 'amber', 2700
kicker: 'Founder pricing', ), submitAct(900)],
title: 'Cheap tier almost gone.',
items: [
{ label: '£0.99 / month', value: 'sold out', tone: 'bad' },
{ label: '£9.99 / month', value: '17 left', tone: 'warn' },
{ label: '£29.99 / month', value: 'next', tone: 'neutral' },
],
})],
tail: [wait(400)], tail: [wait(400)],
}, },
{ {
text: 'Then the price triples.', text: 'Use the map before spending a Saturday in the wrong area.',
during: [scrollFilters(300, 900)],
tail: [wait(500)], tail: [wait(500)],
}, },
{ {
text: 'Get in before the next jump.', text: 'The cheapest mistake is the one you skip.',
during: [hideScene(360)],
tail: [wait(600)], tail: [wait(600)],
}, },
], ],