Update data

This commit is contained in:
Andras Schmelczer 2026-05-14 08:17:10 +01:00
parent a4103b0896
commit 273d7a83ee
15 changed files with 716 additions and 316 deletions

4
.gitignore vendored
View file

@ -17,3 +17,7 @@ frontend/public/assets/*
frontend/public/assets/.done
server-rs/logs
video/auth.*
*.jpg
*.jpeg
*.mp4

View file

@ -94,6 +94,7 @@ export function useSavedProperties(userId: string | null) {
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to save property';
setError(msg);
throw err;
}
},
[userId, fetchProperties]

View file

@ -23,6 +23,11 @@ html.dark {
color-scheme: dark;
}
button:not(:disabled),
[role='button']:not([aria-disabled='true']) {
cursor: pointer;
}
/* Smooth theme transitions (scoped to avoid map performance issues) */
body,
div,

View file

@ -10,7 +10,11 @@ import pyarrow as pa
import pyarrow.csv as pa_csv
import pyarrow.parquet as pq
from ..utils import fuzzy_join_on_postcode
from ..utils import (
fuzzy_join_on_postcode,
normalize_address_key,
normalize_postcode_key,
)
pl.Config.set_tbl_cols(-1)
@ -193,12 +197,15 @@ def main():
def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Path):
epc_base = _scan_epc_certificates(epc_path, temp_dir)
epc_base = _scan_epc_certificates(epc_path, temp_dir).with_columns(
normalize_address_key(pl.col("epc_address")).alias("_epc_match_address"),
normalize_postcode_key(pl.col("epc_postcode")).alias("_epc_match_postcode"),
)
# Dedup fork: keep latest certificate per property (existing logic)
epc = (
epc_base.sort("inspection_date", descending=True)
.group_by("epc_address", "epc_postcode")
.group_by("_epc_match_address", "_epc_match_postcode")
.first()
.drop("tenure")
)
@ -216,15 +223,15 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.with_columns(
pl.col("number_habitable_rooms")
.shift(1)
.over("epc_address", "epc_postcode")
.over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_rooms"),
pl.col("total_floor_area")
.shift(1)
.over("epc_address", "epc_postcode")
.over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_area"),
pl.col("_rating_rank")
.shift(1)
.over("epc_address", "epc_postcode")
.over("_epc_match_address", "_epc_match_postcode")
.alias("_prev_rating_rank"),
)
.with_columns(
@ -257,7 +264,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.cast(pl.Int32)
.alias("_event_year"),
)
.group_by("epc_address", "epc_postcode")
.group_by("_epc_match_address", "_epc_match_postcode")
.agg(
pl.struct(
pl.col("_event_year").alias("year"),
@ -276,7 +283,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
# Social tenure fork: flag properties that were ever social housing
social_tenure = (
epc_base.filter(pl.col("tenure").str.to_lowercase().str.contains("social"))
.select("epc_address", "epc_postcode")
.select("_epc_match_address", "_epc_match_postcode")
.unique()
.with_columns(pl.lit("Yes").alias("was_council_house"))
.collect()
@ -287,12 +294,12 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
epc = (
epc.join(
events.lazy(),
on=["epc_address", "epc_postcode"],
on=["_epc_match_address", "_epc_match_postcode"],
how="left",
)
.join(
social_tenure.lazy(),
on=["epc_address", "epc_postcode"],
on=["_epc_match_address", "_epc_match_postcode"],
how="left",
)
.with_columns(
@ -339,9 +346,23 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
ignore_nulls=True,
).alias("pp_address"),
)
.with_columns(
normalize_address_key(pl.col("pp_address")).alias("_pp_match_address"),
normalize_postcode_key(pl.col("postcode")).alias("_pp_match_postcode"),
)
.filter(pl.col("_pp_match_postcode").is_not_null())
.with_columns(
pl.coalesce("_pp_match_address", "pp_address").alias("_pp_group_address"),
pl.col("_pp_match_postcode").alias("_pp_group_postcode"),
)
.filter(pl.col("pp_address").is_not_null())
.sort("date_of_transfer")
.group_by("pp_address", "postcode", maintain_order=True)
.group_by("_pp_group_address", "_pp_group_postcode", maintain_order=True)
.agg(
pl.col("pp_address").last(),
pl.col("postcode").last(),
pl.col("_pp_match_address").last(),
pl.col("_pp_match_postcode").last(),
pl.struct(
pl.col("date_of_transfer").dt.year().alias("year"),
pl.col("date_of_transfer").dt.month().cast(pl.UInt8).alias("month"),
@ -354,7 +375,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
pl.col("date_of_transfer").first().alias("first_transfer_date"),
pl.col("old_new").first(),
)
).filter(pl.col("pp_address").is_not_null())
)
print("Price paid dataset")
print(price_paid.head().collect())
@ -405,7 +426,19 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
.then(pl.lit(1, dtype=pl.UInt8))
.otherwise(pl.lit(None, dtype=pl.UInt8))
.alias("is_construction_date_approximate"),
).drop("old_new", "first_transfer_date")
).drop(
[
"old_new",
"first_transfer_date",
"_pp_match_address",
"_pp_match_postcode",
"_pp_group_address",
"_pp_group_postcode",
"_epc_match_address",
"_epc_match_postcode",
],
strict=False,
)
joined = joined.rename({col: col.lower() for col in joined.columns})

View file

@ -22,6 +22,8 @@ _AREA_COLUMNS = [
"Postcode",
"lat",
"lon",
# Runtime provenance for deciding whether missing coordinates are skippable.
"ctry25cd",
# Deprivation
"Income Score",
"Employment Score",
@ -86,6 +88,15 @@ _AREA_COLUMNS = [
_DYNAMIC_POI_DISTANCE_RE = re.compile(r"^Distance to nearest amenity \(.+\) \(km\)$")
_DYNAMIC_POI_COUNT_RE = re.compile(r"^Number of amenities \(.+\) within (2|5)km$")
TREE_DENSITY_FEATURE = "Street tree density percentile"
_POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
r"^Tree canopy density percentile within \d+m$"
)
_RENT_SOURCE_UNAVAILABLE_LADS = {
# ONS PIPR does not publish LAD-level private-rent estimates for these
# small authorities. Keep rent null there, but fail on any other LAD miss.
"E06000053": "Isles of Scilly",
"E09000001": "City of London",
}
def _is_dynamic_poi_metric_column(column: str) -> bool:
@ -112,6 +123,107 @@ def _less_deprived_percentile_expr(column: str) -> pl.Expr:
)
def _tree_density_by_postcode(tree_density_postcodes_path: Path) -> pl.LazyFrame:
tree_density = pl.scan_parquet(tree_density_postcodes_path)
columns = set(tree_density.collect_schema().names())
if "postcode" not in columns:
raise ValueError(
f"{tree_density_postcodes_path} is missing required column: postcode"
)
if TREE_DENSITY_FEATURE in columns:
density_column = TREE_DENSITY_FEATURE
else:
candidates = sorted(
c for c in columns if _POSTCODE_TREE_DENSITY_PERCENTILE_RE.match(c)
)
if len(candidates) != 1:
raise ValueError(
f'{tree_density_postcodes_path} must contain column "{TREE_DENSITY_FEATURE}" '
'or exactly one "Tree canopy density percentile within {radius}m" column; '
f"found {len(candidates)} postcode percentile columns"
)
density_column = candidates[0]
return (
tree_density.select(
pl.col("postcode"),
pl.col(density_column).cast(pl.Float32).alias(TREE_DENSITY_FEATURE),
)
.drop_nulls(["postcode"])
.unique(["postcode"])
)
def _validate_lad_source_coverage(
iod_path: Path, ethnicity_path: Path, rental_prices_path: Path
) -> None:
iod_lads = (
pl.read_parquet(
iod_path,
columns=[
"Local Authority District code (2024)",
"Local Authority District name (2024)",
],
)
.rename(
{
"Local Authority District code (2024)": "lad",
"Local Authority District name (2024)": "lad_name",
}
)
.unique(["lad"])
)
ethnicity_lads = pl.read_parquet(ethnicity_path, columns=["Geography_code"]).rename(
{"Geography_code": "lad"}
)
missing_ethnicity = iod_lads.join(ethnicity_lads, on="lad", how="anti").sort("lad")
if missing_ethnicity.height > 0:
raise ValueError(
"Ethnicity data is missing 2024 LAD coverage: "
f"{missing_ethnicity.to_dicts()}"
)
rental_lads = pl.read_parquet(rental_prices_path, columns=["area_code"]).rename(
{"area_code": "lad"}
)
missing_rent = iod_lads.join(rental_lads, on="lad", how="anti").sort("lad")
unexpected_missing_rent = missing_rent.filter(
~pl.col("lad").is_in(list(_RENT_SOURCE_UNAVAILABLE_LADS))
)
if unexpected_missing_rent.height > 0:
raise ValueError(
"Rental data is missing 2024 LAD coverage: "
f"{unexpected_missing_rent.to_dicts()}"
)
if missing_rent.height > 0:
print(
"PIPR has no LAD-level rent estimates for source-unavailable LADs; "
f"rent will remain null there: {missing_rent.to_dicts()}"
)
def _validate_property_postcodes(df: pl.DataFrame) -> None:
invalid = df.filter(
pl.col("Postcode").is_null()
| (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
)
if invalid.height == 0:
return
sample_cols = [
col
for col in ("Postcode", "Address per Property Register", "Last known price")
if col in invalid.columns
]
sample = invalid.select(sample_cols).head(10).to_dicts()
raise ValueError(
"Property rows missing a postcode after merge: "
f"{invalid.height} rows. Sample: {sample}"
)
def _build(
epc_pp_path: Path,
arcgis_path: Path,
@ -126,12 +238,14 @@ def _build(
lsoa_population_path: Path,
median_age_path: Path,
election_results_path: Path,
tree_density_addresses_path: Path | None = None,
tree_density_postcodes_path: Path | None = None,
) -> tuple[pl.DataFrame, pl.DataFrame]:
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
Returns (postcode_df, properties_df).
"""
_validate_lad_source_coverage(iod_path, ethnicity_path, rental_prices_path)
wide = pl.scan_parquet(epc_pp_path).filter(
pl.col("total_floor_area").is_null()
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
@ -152,9 +266,15 @@ def _build(
.drop("new_postcode")
)
arcgis_raw = pl.scan_parquet(arcgis_path)
postcode_country = arcgis_raw.select(
pl.col("pcds").alias("postcode"),
pl.col("ctry25cd"),
).unique(["postcode"])
wide = wide.join(postcode_country, on="postcode", how="left")
arcgis = (
pl.scan_parquet(arcgis_path)
.filter(pl.col("ctry25cd") == "E92000001") # England only
arcgis_raw.filter(pl.col("ctry25cd") == "E92000001") # England only
.filter(pl.col("doterm").is_null()) # Active postcodes only
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
# Alias them back to the short canonical names used across the
@ -191,7 +311,9 @@ def _build(
.cast(pl.UInt8)
.alias("_bedrooms"),
)
rental = pl.scan_parquet(rental_prices_path)
rental = pl.scan_parquet(rental_prices_path).select(
"area_code", "bedrooms", "mean_monthly_rent"
)
wide = wide.join(
rental,
left_on=["Local Authority District code (2024)", "_bedrooms"],
@ -260,17 +382,9 @@ def _build(
school_proximity = pl.scan_parquet(school_proximity_path)
wide = wide.join(school_proximity, on="postcode", how="left")
if tree_density_addresses_path is not None:
tree_density = (
pl.scan_parquet(tree_density_addresses_path)
.select(
pl.col("postcode"),
pl.col("pp_address"),
pl.col(TREE_DENSITY_FEATURE).cast(pl.Float32),
)
.unique(["postcode", "pp_address"])
)
wide = wide.join(tree_density, on=["postcode", "pp_address"], how="left")
if tree_density_postcodes_path is not None:
tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
wide = wide.join(tree_density, on="postcode", how="left")
# Broadband: derive max available download speed tier per postcode from
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
@ -415,6 +529,7 @@ def _build(
print("Collecting with streaming engine...")
df = wide.collect(engine="streaming")
_validate_property_postcodes(df)
# Split into postcode-level and property-level dataframes
area_cols = [
@ -508,10 +623,10 @@ def main():
help="2024 General Election results by constituency parquet file",
)
parser.add_argument(
"--tree-density-addresses",
"--tree-density-postcodes",
type=Path,
required=False,
help="Address-level tree density parquet from pipeline.transform.tree_density",
help="Postcode-level tree density parquet from pipeline.transform.tree_density",
)
parser.add_argument(
"--output-postcodes",
@ -541,7 +656,7 @@ def main():
lsoa_population_path=args.lsoa_population,
median_age_path=args.median_age,
election_results_path=args.election_results,
tree_density_addresses_path=args.tree_density_addresses,
tree_density_postcodes_path=args.tree_density_postcodes,
)
print(f"\nPostcode columns: {postcode_df.columns}")

View file

@ -227,7 +227,18 @@ def main():
fa = test["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
print("\nComputing kNN estimates...")
knn_psm = knn_median_psm(trees, lat, lon, tg)
last_sale_dates = (
test["input_date"].dt.epoch("d").fill_null(-1).to_numpy().astype(np.int64)
)
knn_psm = knn_median_psm(
trees,
lat,
lon,
tg,
postcodes=test["Postcode"].fill_null("").to_numpy(),
last_prices=test["input_price"].cast(pl.Float64).to_numpy(),
last_sale_dates=last_sale_dates,
)
# Temporal adjustment: pool PSM is at ref, adjust to actual
log_idx_actual = test["log_index_actual"].to_numpy().astype(np.float64)

View file

@ -13,6 +13,7 @@ for lat/lon needed by kNN, then drops those columns before writing.
import argparse
from pathlib import Path
import numpy as np
import polars as pl
from pipeline.transform.price_estimation.knn import (
@ -28,6 +29,45 @@ from pipeline.transform.price_estimation.utils import (
type_group_expr,
)
MAX_KNN_TO_INDEX_RATIO = 2.0
MIN_KNN_TO_INDEX_RATIO = 0.5
MAX_ESTIMATE_TO_LAST_PRICE_RATIO = 6.0
def guarded_blend_estimates(
index_est: np.ndarray,
knn_est: np.ndarray,
last_prices: np.ndarray,
weight: float = KNN_BLEND_WEIGHT,
) -> np.ndarray:
"""Blend only stable kNN estimates and cap final uplift from last sale price."""
index_est = index_est.astype(np.float64, copy=False)
knn_est = knn_est.astype(np.float64, copy=False)
last_prices = last_prices.astype(np.float64, copy=False)
has_index = np.isfinite(index_est) & (index_est > 0)
has_knn = np.isfinite(knn_est) & (knn_est > 0)
stable_knn = has_knn & (
has_index
& (knn_est >= index_est * MIN_KNN_TO_INDEX_RATIO)
& (knn_est <= index_est * MAX_KNN_TO_INDEX_RATIO)
)
blended = np.where(
has_index & stable_knn,
(1 - weight) * index_est + weight * knn_est,
np.where(has_index, index_est, np.nan),
)
cap = np.where(
np.isfinite(last_prices) & (last_prices > 0),
last_prices * MAX_ESTIMATE_TO_LAST_PRICE_RATIO,
np.nan,
)
return np.where(
np.isfinite(cap) & np.isfinite(blended), np.minimum(blended, cap), blended
)
def main():
parser = argparse.ArgumentParser(
@ -130,36 +170,54 @@ def main():
lon = df["lon"].cast(pl.Float64).to_numpy()
tg = df["_type_group"].fill_null("").to_numpy()
fa = df["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
last_prices = (
df["Last known price"].cast(pl.Float64).fill_null(float("nan")).to_numpy()
)
last_sale_dates = (
df["Date of last transaction"]
.dt.epoch("d")
.fill_null(-1)
.to_numpy()
.astype(np.int64)
)
knn_psm = knn_median_psm(trees, lat, lon, tg)
knn_psm = knn_median_psm(
trees,
lat,
lon,
tg,
postcodes=df["Postcode"].fill_null("").to_numpy(),
last_prices=last_prices,
last_sale_dates=last_sale_dates,
)
knn_est = knn_psm * fa # No temporal adj: ref == current
df = df.with_columns(
pl.Series("_knn_est", knn_est, dtype=pl.Float64),
)
# Blend: where kNN available, use weighted average; else keep index
# Blend only when kNN is close to the index estimate; otherwise keep index.
index_est = (
df["Estimated current price"]
.cast(pl.Float64)
.fill_null(float("nan"))
.to_numpy()
)
blended = guarded_blend_estimates(index_est, knn_est, last_prices)
df = df.with_columns(
pl.when(
pl.col("Estimated current price").is_not_null()
& pl.col("_knn_est").is_not_null()
& pl.col("_knn_est").is_finite()
& (pl.col("_knn_est") > 0)
)
.then(
(1 - KNN_BLEND_WEIGHT) * pl.col("Estimated current price")
+ KNN_BLEND_WEIGHT * pl.col("_knn_est")
)
.when(pl.col("Estimated current price").is_not_null())
.then(pl.col("Estimated current price"))
.otherwise(pl.lit(None))
.alias("Estimated current price"),
pl.Series("_index_est", index_est, dtype=pl.Float64),
pl.Series("Estimated current price", blended, dtype=pl.Float64),
).with_columns(
pl.col("Estimated current price").fill_nan(None),
)
n_blended = df.filter(
pl.col("_knn_est").is_not_null()
& pl.col("_knn_est").is_finite()
& (pl.col("_knn_est") > 0)
& (pl.col("_index_est").is_not_null())
& (pl.col("_knn_est") >= pl.col("_index_est") * MIN_KNN_TO_INDEX_RATIO)
& (pl.col("_knn_est") <= pl.col("_index_est") * MAX_KNN_TO_INDEX_RATIO)
& pl.col("Estimated current price").is_not_null()
).height
print(f" kNN blended: {n_blended:,} of {n_estimated:,} estimates")

View file

@ -21,6 +21,10 @@ from pipeline.transform.price_estimation.utils import (
KNN_K = 20
KNN_MIN_NEIGHBORS = 5
KNN_BLEND_WEIGHT = 0.35
MIN_COMPARABLE_FLOOR_AREA_SQM = 15.0
MAX_COMPARABLE_FLOOR_AREA_SQM = 1_000.0
MIN_COMPARABLE_PSM = 500.0
MAX_COMPARABLE_PSM = 50_000.0
def _scale_coords(lat: np.ndarray, lon: np.ndarray) -> np.ndarray:
@ -33,13 +37,14 @@ def build_knn_pool(
index: pl.DataFrame,
ref_frac_year: float,
max_sale_year: int | None = None,
) -> dict[str, tuple[KDTree, np.ndarray]]:
) -> dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
"""Build per-type_group KD-trees of index-adjusted price-per-sqm.
Adjusts all pool properties' sale prices to ref_frac_year using the index,
then builds a KD-tree per type_group for nearest-neighbor queries.
Returns dict mapping type_group -> (KDTree over scaled lat/lon, adjusted_psm array).
Returns dict mapping type_group to KDTree, adjusted PSM, and sale identity
arrays used to keep the target sale out of its own comparable set.
"""
print("Building kNN pool...")
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
@ -55,7 +60,8 @@ def build_knn_pool(
pl.col("lat").is_not_null(),
pl.col("lon").is_not_null(),
pl.col("Total floor area (sqm)").is_not_null(),
pl.col("Total floor area (sqm)") > 0,
pl.col("Total floor area (sqm)") >= MIN_COMPARABLE_FLOOR_AREA_SQM,
pl.col("Total floor area (sqm)") <= MAX_COMPARABLE_FLOOR_AREA_SQM,
pl.col("Last known price").is_not_null(),
pl.col("Last known price") > 0,
pl.col("Postcode").is_not_null(),
@ -97,12 +103,13 @@ def build_knn_pool(
).filter(
pl.col("_adj_psm").is_not_null(),
pl.col("_adj_psm").is_finite(),
pl.col("_adj_psm") > 0,
pl.col("_adj_psm") >= MIN_COMPARABLE_PSM,
pl.col("_adj_psm") <= MAX_COMPARABLE_PSM,
)
print(f" {len(pool):,} after index adjustment")
# Build per-type KD-trees
trees: dict[str, tuple[KDTree, np.ndarray]] = {}
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]] = {}
for tg in TYPE_GROUPS:
sub = pool.filter(pl.col("type_group") == tg)
n = len(sub)
@ -111,19 +118,49 @@ def build_knn_pool(
lat = sub["lat"].to_numpy().astype(np.float64)
lon = sub["lon"].to_numpy().astype(np.float64)
psm = sub["_adj_psm"].to_numpy().astype(np.float64)
postcodes = sub["Postcode"].fill_null("").to_numpy()
prices = sub["Last known price"].to_numpy().astype(np.float64)
sale_dates = (
sub["Date of last transaction"]
.dt.epoch("d")
.fill_null(-1)
.to_numpy()
.astype(np.int64)
)
tree = KDTree(_scale_coords(lat, lon))
trees[tg] = (tree, psm)
trees[tg] = (tree, psm, postcodes, prices, sale_dates)
print(f" {tg}: {n:,}")
return trees
def _sale_identity_matches(
pool_postcodes: np.ndarray,
pool_prices: np.ndarray,
pool_sale_dates: np.ndarray,
target_postcode: str,
target_price: float,
target_sale_date: int,
) -> np.ndarray:
if not target_postcode or not np.isfinite(target_price) or target_sale_date < 0:
return np.zeros(len(pool_postcodes), dtype=bool)
return (
(pool_postcodes == target_postcode)
& np.isfinite(pool_prices)
& np.isclose(pool_prices, target_price, rtol=0.0, atol=0.5)
& (pool_sale_dates == target_sale_date)
)
def knn_median_psm(
trees: dict[str, tuple[KDTree, np.ndarray]],
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]],
lat: np.ndarray,
lon: np.ndarray,
type_groups: np.ndarray,
k: int = KNN_K,
postcodes: np.ndarray | None = None,
last_prices: np.ndarray | None = None,
last_sale_dates: np.ndarray | None = None,
) -> np.ndarray:
"""Return median adjusted-PSM of k nearest neighbours for each target.
@ -133,21 +170,41 @@ def knn_median_psm(
n = len(lat)
result = np.full(n, np.nan)
for tg, (tree, psm) in trees.items():
for tg, (tree, psm, pool_postcodes, pool_prices, pool_sale_dates) in trees.items():
mask = (type_groups == tg) & np.isfinite(lat) & np.isfinite(lon)
idx = np.where(mask)[0]
if len(idx) == 0:
continue
actual_k = min(k, len(psm))
if actual_k < KNN_MIN_NEIGHBORS:
query_k = min(max(k * 2, k + KNN_MIN_NEIGHBORS), len(psm))
if query_k < KNN_MIN_NEIGHBORS:
continue
coords = _scale_coords(lat[idx], lon[idx])
_, nn_idx = tree.query(coords, k=actual_k)
_, nn_idx = tree.query(coords, k=query_k)
if nn_idx.ndim == 1:
nn_idx = nn_idx.reshape(-1, 1)
result[idx] = np.nanmedian(psm[nn_idx], axis=1)
medians = np.full(len(idx), np.nan)
for row_num, target_idx in enumerate(idx):
candidates = nn_idx[row_num]
if (
postcodes is not None
and last_prices is not None
and last_sale_dates is not None
):
same_sale = _sale_identity_matches(
pool_postcodes[candidates],
pool_prices[candidates],
pool_sale_dates[candidates],
str(postcodes[target_idx] or ""),
float(last_prices[target_idx]),
int(last_sale_dates[target_idx]),
)
candidates = candidates[~same_sale]
if len(candidates) >= KNN_MIN_NEIGHBORS:
medians[row_num] = np.nanmedian(psm[candidates[:k]])
result[idx] = medians
return result

View file

@ -19,7 +19,7 @@ TERRACE_TYPES = [
"Terraced",
]
FLAT_TYPES = ["Flats/Maisonettes"]
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"]
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats"]
SHRINKAGE_K = 50
@ -30,8 +30,6 @@ def type_group_expr():
.then(pl.lit("Terraced"))
.when(pl.col("Property type").is_in(FLAT_TYPES))
.then(pl.lit("Flats"))
.when(pl.col("Property type") == "Bungalow")
.then(pl.lit("Bungalow"))
.when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
.then(pl.col("Property type"))
.otherwise(pl.lit(None))
@ -61,7 +59,7 @@ def hierarchy_keys(sector: str) -> tuple[str, str]:
return district, area
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"]
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats"]
def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:

View file

@ -15,12 +15,21 @@ DROP_CATEGORIES = {
"amenity/bicycle_parking",
"amenity/binoculars",
"amenity/boot_scraper",
"amenity/bus_garage",
"amenity/check_in",
"amenity/clock",
"amenity/clothes_dryer",
"amenity/coast_guard",
"amenity/coffin_rest",
"amenity/compressed_air",
"amenity/court_yard",
"amenity/donation_box",
"amenity/dressing_room",
"amenity/drinking_water",
"emergency/water_tank",
"leisure/bleachers",
"leisure/schoolyard",
"public_transport/pay_scale_area",
"shop/taxi",
"amenity/feeding_place",
"amenity/fixme",
@ -31,6 +40,7 @@ DROP_CATEGORIES = {
"amenity/lounge",
"tourism/preserved_railway",
"amenity/lounger",
"leisure/sport",
"amenity/motorcycle_parking",
"amenity/mounting_block",
"amenity/notice_board",
@ -71,8 +81,12 @@ DROP_CATEGORIES = {
"amenity/boat_storage",
"amenity/bureau_de_change",
"amenity/bus_station",
"amenity/beachhut",
"amenity/canteen",
"amenity/conference_centre",
"amenity/crematorium",
"amenity/disused",
"amenity/driver_training",
"amenity/driving_school",
"amenity/escooter_rental",
"amenity/ferry_terminal",
@ -82,14 +96,21 @@ DROP_CATEGORIES = {
"amenity/kick-scooter_rental",
"amenity/money_transfer",
"amenity/post_depot",
"amenity/prison",
"amenity/public_building",
"amenity/recycling",
"amenity/scout_hut",
"amenity/social_facility",
"amenity/studio",
"amenity/student_accommodation",
"amenity/taxi",
"amenity/telephone_exchange",
"amenity/training",
"amenity/vehicle_inspection",
"amenity/waiting_room",
"amenity/yes",
"shop/disused",
"shop/no",
# Buildings (except church & university which are mapped)
"building/air_shaft",
"building/apartments",
@ -148,12 +169,14 @@ DROP_CATEGORIES = {
"emergency/yes",
"tourism/apartment",
"tourism/apartments",
"tourism/alpine_hut",
"tourism/camp_pitch",
"tourism/caravan_site",
"tourism/information",
"tourism/picnic_site",
"tourism/viewpoint",
"tourism/village_sign",
"tourism/wilderness_hut",
"tourism/yes",
# Public transport (from NaPTAN instead)
"public_transport/entrance",
@ -191,6 +214,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🍺",
[
"amenity/pub",
"amenity/beer_garden",
"amenity/biergarten",
"amenity/social_club",
"amenity/club",
"leisure/social_club",
@ -293,7 +318,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"leisure/miniature_golf",
"leisure/horse_riding",
"leisure/fishing",
"leisure/ice_rink",
"leisure/paddling_pool",
"leisure/practice_pitch",
"leisure/shooting_ground",
"leisure/stadium",
"leisure/swimming_pool",
"leisure/swimming_area",
"leisure/water_park",
"leisure/bathing_place",
],
@ -307,9 +338,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"leisure/amusement_arcade",
"leisure/adult_gaming_centre",
"leisure/escape_game",
"leisure/maze",
"leisure/trampoline_park",
"leisure/sauna",
"leisure/tanning_salon",
"shop/amusements",
"tourism/theme_park",
"amenity/bicycle_rental",
"amenity/boat_rental",
@ -345,6 +378,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"shop/bakery",
"shop/pastry",
"craft/bakery",
"craft/confectionery",
],
),
@ -364,6 +398,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"shop/greengrocer",
"shop/farm",
"shop/market",
"amenity/marketplace",
],
),
@ -424,6 +459,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/appliance",
"shop/electrical",
"shop/hifi",
"shop/vacuum_cleaner",
"shop/video_games",
"shop/games",
],
@ -444,7 +480,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"shop/doityourself",
"shop/hardware",
"shop/builders_merchant",
"shop/paint",
"shop/plumbing",
],
),
(
@ -462,11 +500,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/curtain",
"shop/flooring",
"shop/fireplace",
"shop/garden_furniture",
"shop/groundskeeping",
"shop/household",
"shop/household_linen",
"shop/houseware",
"shop/homeware",
"shop/interior_decoration",
"shop/lighting",
"shop/kitchenware",
"shop/window_blind",
],
),
@ -493,8 +535,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🏕️",
[
"shop/sports",
"shop/angling",
"shop/outdoor",
"shop/bicycle",
"shop/equestrian",
"shop/surf",
],
),
(
@ -532,9 +577,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/music",
"shop/musical_instrument",
"shop/antiques",
"shop/anime",
"shop/baby_goods",
"shop/fabric",
"shop/haberdashery",
"shop/hobby",
"shop/wool",
"shop/pottery",
],
@ -549,9 +596,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/bookmaker",
"shop/building_materials",
"shop/camera",
"shop/cannabis",
"shop/car",
"shop/caravan",
"shop/catalogue",
"shop/auction",
"shop/auction_house",
"shop/chandler",
"shop/collector",
"shop/copyshop",
"shop/country_store",
@ -560,6 +611,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/erotic",
"shop/esoteric",
"shop/fan",
"shop/fireworks",
"shop/fishing",
"shop/frame",
"shop/fuel",
@ -582,6 +634,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/scuba_diving",
"shop/security",
"shop/sewing",
"shop/ship_chandler",
"shop/signs",
"shop/storage_rental",
"shop/swimming_pool",
"shop/telecommunication",
@ -590,7 +644,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/tool_hire",
"shop/trade",
"shop/trophy",
"shop/truck",
"shop/vacant",
"shop/van",
"shop/video",
"shop/water_sports",
"shop/weapons",
@ -611,6 +667,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"shop/cosmetics",
"shop/massage",
"shop/perfumery",
"leisure/spa",
],
),
(
@ -757,6 +814,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"amenity/hospital",
"amenity/clinic",
"amenity/health_centre",
"healthcare/blood_donation",
"healthcare/hospital",
"healthcare/centre",
"healthcare/clinic",
@ -804,6 +863,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
[
"amenity/care_home",
"amenity/nursing_home",
"amenity/retirement_home",
"healthcare/hospice",
"healthcare/nursing_home",
"office/home_care",
],
),
@ -848,6 +910,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"",
[
"amenity/place_of_worship",
"amenity/monastery",
"building/church",
],
),
@ -873,6 +936,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"📸",
[
"tourism/attraction",
"tourism/aquarium",
"amenity/fountain",
"amenity/courthouse",
"tourism/chalet",
@ -892,6 +956,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"building/university",
"amenity/kindergarten",
"amenity/childcare",
"office/tutoring",
],
),
(
@ -904,6 +969,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"tourism/guest_house",
"tourism/motel",
"tourism/camp_site",
"leisure/resort",
"tourism/holiday_park",
"tourism/self_catering",
],
),
(
@ -928,14 +996,19 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"craft/window_construction",
"craft/agricultural_engines",
"craft/atelier",
"craft/beekeeper",
"craft/blacksmith",
"craft/bookbinder",
"craft/boatbuilder",
"craft/caterer",
"craft/carpet_layer",
"craft/clockmaker",
"craft/handicraft",
"craft/jeweller",
"craft/metal_construction",
"craft/photographer",
"craft/photographic_laboratory",
"craft/plasterer",
"craft/pottery",
"craft/printer",
"craft/sawmill",
@ -946,22 +1019,28 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"craft/upholsterer",
"craft/watchmaker",
"craft/yes",
"amenity/workshop",
"shop/glaziery",
"shop/windows",
# Professional offices & estate agents
"shop/estate_agent",
"office/accountant",
"office/architect",
"office/auctioneer",
"office/builder",
"office/construction",
"office/construction_company",
"office/engineer",
"office/estate_agent",
"office/financial",
"office/financial_advisor",
"office/financial_services",
"office/insurance",
"office/lawyer",
"office/mortgage",
"office/property_management",
"office/solicitor",
"office/solicitors",
"office/surveyor",
"office/tax_advisor",
],
@ -972,6 +1051,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"🏢",
[
"amenity/coworking_space",
"amenity/research_institute",
"office/administrative",
"office/advertising_agency",
"office/association",
"office/charity",
@ -997,12 +1078,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"office/notary",
"office/political_party",
"office/politician",
"office/publisher",
"office/quango",
"office/recruitment",
"office/religion",
"office/research",
"office/security",
"office/taxi",
"office/telecommunication",
"office/transport",
"office/union",
"office/university",
"office/vacant",
@ -1032,7 +1116,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
"Community Centre",
"🤝",
[
"amenity/church_hall",
"amenity/clubhouse",
"amenity/community_centre",
"amenity/community_hall",
"amenity/scout_hall",
"amenity/social_centre",
"amenity/townhall",
],

View file

@ -7,6 +7,8 @@ from scipy.spatial import cKDTree
from .haversine import haversine_km
EARTH_RADIUS_KM = 6371.0088
KM_PER_DEGREE_LAT = 111.32
DEFAULT_GRID_SIZE_DEGREES = 0.02
def _build_poi_grid(
@ -34,16 +36,29 @@ def _build_poi_grid(
def _get_nearby_indices(
pc_lat: float, pc_lon: float, poi_grid: dict, grid_size: float = 0.05
pc_lat: float,
pc_lon: float,
poi_grid: dict,
radius_km: float,
grid_size: float = DEFAULT_GRID_SIZE_DEGREES,
) -> np.ndarray | None:
"""Get POI indices from grid cells near the given coordinate."""
grid_lat = int(np.floor(pc_lat / grid_size))
grid_lng = int(np.floor(pc_lon / grid_size))
"""Get POI indices from all grid cells intersecting the radius bounding box."""
if not np.isfinite(pc_lat) or not np.isfinite(pc_lon):
return None
lat_delta = radius_km / KM_PER_DEGREE_LAT
cos_lat = abs(np.cos(np.radians(pc_lat)))
lng_delta = 180.0 if cos_lat < 1e-12 else radius_km / (KM_PER_DEGREE_LAT * cos_lat)
min_grid_lat = int(np.floor((pc_lat - lat_delta) / grid_size))
max_grid_lat = int(np.floor((pc_lat + lat_delta) / grid_size))
min_grid_lng = int(np.floor((pc_lon - lng_delta) / grid_size))
max_grid_lng = int(np.floor((pc_lon + lng_delta) / grid_size))
nearby_indices = []
for dlat in [-1, 0, 1]:
for dlng in [-1, 0, 1]:
cell_key = (grid_lat + dlat, grid_lng + dlng)
for grid_lat in range(min_grid_lat, max_grid_lat + 1):
for grid_lng in range(min_grid_lng, max_grid_lng + 1):
cell_key = (grid_lat, grid_lng)
if cell_key in poi_grid:
nearby_indices.append(poi_grid[cell_key])
@ -83,7 +98,7 @@ def count_pois_per_postcode(
n_pois = len(pois)
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
grid_size = 0.05
grid_size = DEFAULT_GRID_SIZE_DEGREES
print(" Building POI spatial grid...")
poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size)
print(f" POI grid has {len(poi_grid):,} occupied cells")
@ -120,7 +135,9 @@ def count_pois_per_postcode(
# Process batch
for i in range(start_idx, end_idx):
nearby = _get_nearby_indices(pc_lats[i], pc_lons[i], poi_grid, grid_size)
nearby = _get_nearby_indices(
pc_lats[i], pc_lons[i], poi_grid, radius_km, grid_size
)
if nearby is None:
continue

View file

@ -215,6 +215,14 @@ struct Cli {
#[arg(long, env = "STRIPE_REFERRAL_COUPON_ID")]
stripe_referral_coupon_id: String,
/// Bearer token required to scrape /metrics.
#[arg(long, env = "METRICS_BEARER_TOKEN")]
metrics_bearer_token: Option<String>,
/// Allow unauthenticated /metrics scraping when no METRICS_BEARER_TOKEN is set.
#[arg(long, env = "ALLOW_PUBLIC_METRICS", default_value_t = false)]
allow_public_metrics: bool,
/// Google OAuth client ID for PocketBase SSO
#[arg(long, env = "GOOGLE_OAUTH_CLIENT_ID")]
google_oauth_client_id: String,
@ -246,6 +254,8 @@ async fn main() -> anyhow::Result<()> {
info!("Prometheus metrics initialized");
let cli = Cli::parse();
let metrics_bearer_token = cli.metrics_bearer_token.clone();
let allow_public_metrics = cli.allow_public_metrics;
for (label, path) in [
("Properties", &cli.properties),
@ -510,7 +520,10 @@ async fn main() -> anyhow::Result<()> {
let public_url_tiles = initial_state.public_url.clone();
let api = Router::new()
.route("/api/features", get(routes::get_features))
.route(
"/api/features",
get(routes::get_features).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/hexagons",
get(routes::get_hexagons).layer(ConcurrencyLimitLayer::new(20)),
@ -519,30 +532,57 @@ async fn main() -> anyhow::Result<()> {
"/api/postcodes",
get(routes::get_postcodes).layer(ConcurrencyLimitLayer::new(20)),
)
.route("/api/postcode/{postcode}", get(routes::get_postcode_lookup))
.route("/api/nearest-postcode", get(routes::get_nearest_postcode))
.route(
"/api/postcode/{postcode}",
get(routes::get_postcode_lookup).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/nearest-postcode",
get(routes::get_nearest_postcode).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/pois",
get(routes::get_pois).layer(ConcurrencyLimitLayer::new(20)),
)
.route("/api/poi-categories", get(routes::get_poi_categories))
.route("/api/places", get(routes::get_places))
.route("/api/travel-modes", get(routes::get_travel_modes))
.route(
"/api/poi-categories",
get(routes::get_poi_categories).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/places",
get(routes::get_places).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/travel-modes",
get(routes::get_travel_modes).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/travel-destinations",
get(routes::get_travel_destinations),
get(routes::get_travel_destinations).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/journey",
get(routes::get_journey).layer(ConcurrencyLimitLayer::new(10)),
)
.route("/api/journey", get(routes::get_journey))
.route(
"/api/hexagon-properties",
get(routes::get_hexagon_properties),
get(routes::get_hexagon_properties).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/filter-counts",
get(routes::get_filter_counts).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/hexagon-stats",
get(routes::get_hexagon_stats).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/postcode-stats",
get(routes::get_postcode_stats).layer(ConcurrencyLimitLayer::new(5)),
)
.route("/api/filter-counts", get(routes::get_filter_counts))
.route("/api/hexagon-stats", get(routes::get_hexagon_stats))
.route("/api/postcode-stats", get(routes::get_postcode_stats))
.route(
"/api/postcode-properties",
get(routes::get_postcode_properties),
get(routes::get_postcode_properties).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/screenshot",
@ -552,13 +592,26 @@ async fn main() -> anyhow::Result<()> {
"/api/export",
get(routes::get_export).layer(ConcurrencyLimitLayer::new(3)),
)
.route("/api/me", get(routes::get_me))
.route("/api/shorten", post(routes::post_shorten))
.route(
"/api/me",
get(routes::get_me).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/shorten",
post(routes::post_shorten).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/share-links",
get(routes::get_share_links).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/ai-filters",
post(routes::post_ai_filters).layer(ConcurrencyLimitLayer::new(5)),
)
.route("/api/streetview", get(routes::get_streetview))
.route(
"/api/streetview",
get(routes::get_streetview).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/rightmove-search",
get(routes::get_rightmove_redirect).layer(ConcurrencyLimitLayer::new(10)),
@ -567,23 +620,44 @@ async fn main() -> anyhow::Result<()> {
"/api/newsletter",
patch(routes::patch_newsletter).layer(ConcurrencyLimitLayer::new(10)),
)
.route("/api/pricing", get(routes::get_pricing))
.route(
"/api/pricing",
get(routes::get_pricing).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/api/checkout",
post(routes::post_checkout).layer(ConcurrencyLimitLayer::new(10)),
)
.route("/api/stripe-webhook", post(routes::post_stripe_webhook))
.route(
"/api/stripe-webhook",
post(routes::post_stripe_webhook).layer(ConcurrencyLimitLayer::new(10)),
)
.route(
"/api/invites",
get(routes::get_invites).post(routes::post_invites),
get(routes::get_invites)
.post(routes::post_invites)
.layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/invite/{code}",
get(routes::get_invite).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/api/redeem-invite",
post(routes::post_redeem_invite).layer(ConcurrencyLimitLayer::new(5)),
)
.route(
"/s/{code}",
get(routes::get_short_url).layer(ConcurrencyLimitLayer::new(10)),
)
.route("/api/invite/{code}", get(routes::get_invite))
.route("/api/redeem-invite", post(routes::post_redeem_invite))
.route("/s/{code}", get(routes::get_short_url))
.route(
"/api/telemetry",
post(routes::post_telemetry).layer(ConcurrencyLimitLayer::new(20)),
)
.route(
"/pb/api/realtime",
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(50)),
)
.route(
"/pb/{*rest}",
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(10)),
@ -591,19 +665,28 @@ async fn main() -> anyhow::Result<()> {
// Tile routes use a different state type — kept as closures
.route(
"/api/tiles/{z}/{x}/{y}",
get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path)),
get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path))
.layer(ConcurrencyLimitLayer::new(30)),
)
.route(
"/api/tiles/style.json",
get(move |query| {
let pu = public_url_tiles.clone();
routes::get_style(axum::extract::State(reader_style.clone()), pu, query)
}),
})
.layer(ConcurrencyLimitLayer::new(20)),
)
.route("/health", get(|| async { "ok" }))
.route(
"/metrics",
get(move || metrics::metrics_handler(metrics_handle.clone())),
get(move |headers| {
metrics::metrics_handler(
metrics_handle.clone(),
metrics_bearer_token.clone(),
allow_public_metrics,
headers,
)
}),
)
.with_state(shared.clone());

View file

@ -8,7 +8,7 @@ use serde::Deserialize;
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET};
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET};
use crate::licensing::{check_license_point, resolve_share_code};
use crate::parsing::{parse_filters_with_poi, row_passes_filters, row_passes_poi_filters};
use crate::state::SharedState;
@ -151,10 +151,7 @@ pub async fn get_postcode_properties(
});
let total = matching_rows.len();
let limit = params
.limit
.unwrap_or(DEFAULT_PROPERTIES_LIMIT)
.min(MAX_PROPERTIES_LIMIT);
let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT);
let page_offset = params.offset.unwrap_or(0);
let truncated = total > page_offset + limit;

View file

@ -12,7 +12,6 @@ use tracing::info;
use crate::aggregation::{Aggregator, EnumDistConfig, PoiAggregator};
use crate::auth::OptionalUser;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::data::travel_time::TravelData;
use crate::licensing::{check_license_bounds, resolve_share_code};
use crate::parsing::{
@ -354,73 +353,61 @@ pub async fn get_postcodes(
features.push(feature);
included_postcodes.insert(pc_idx);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
}
if features.len() < MAX_CELLS_PER_REQUEST {
for pc_idx in selectable_postcodes {
if included_postcodes.contains(&pc_idx) {
continue;
}
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = postcode_data.geometry_geojson(pc_idx);
let centroid = postcode_data.centroids[pc_idx];
let mut props = Map::new();
props.insert(
"postcode".into(),
Value::String(postcode_data.postcodes[pc_idx].clone()),
);
props.insert("count".into(), Value::from(0));
props.insert(
"centroid".into(),
Value::Array(vec![
Value::from(centroid.1 as f64),
Value::from(centroid.0 as f64),
]),
);
let mut feature = Map::new();
feature.insert("type".into(), Value::String("Feature".into()));
feature.insert("geometry".into(), geometry);
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
for pc_idx in selectable_postcodes {
if included_postcodes.contains(&pc_idx) {
continue;
}
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = postcode_data.geometry_geojson(pc_idx);
let centroid = postcode_data.centroids[pc_idx];
let mut props = Map::new();
props.insert(
"postcode".into(),
Value::String(postcode_data.postcodes[pc_idx].clone()),
);
props.insert("count".into(), Value::from(0));
props.insert(
"centroid".into(),
Value::Array(vec![
Value::from(centroid.1 as f64),
Value::from(centroid.0 as f64),
]),
);
let mut feature = Map::new();
feature.insert("type".into(), Value::String("Feature".into()));
feature.insert("geometry".into(), geometry);
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
}
histogram!("postcodes_response_count").record(features.len() as f64);
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed();
info!(
postcodes_before_filter,
matching_postcodes,
postcodes_after_filter = features.len(),
filtered_out,
truncated,
bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east),
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),

View file

@ -3,7 +3,6 @@ import {
hex,
vfrac,
type Activity,
type AdScene,
type Storyboard,
type TravelTimeFilter,
type VideoConfig,
@ -629,30 +628,6 @@ const AD_DEFAULT_FILTERS: Record<string, [number, number] | string[]> = {
'Outstanding primary schools within 2km': [0, 10],
};
/**
* Stable Unsplash CDN photo URLs. Each one is a 720-wide JPEG fetched at
* record time. The CDN serves with permissive CORS, no auth needed, and
* the IDs are stable URLs (Unsplash does not rotate them). If any photo
* stops resolving, dom.ts hides the broken image and the rest of the
* scene still renders, so a 404 here degrades to text-only rather than
* breaking the ad. To swap a photo, search unsplash.com for the theme
* and paste the `photo-{id}` slug from the URL bar.
*/
const PHOTO = {
terracedRow: 'https://images.unsplash.com/photo-1769344694490-66fb22a8d8cf?w=720&q=80&auto=format&fit=crop',
brickStreet: 'https://images.unsplash.com/photo-1689867373120-355ce130d485?w=720&q=80&auto=format&fit=crop',
woodAccentHouses: 'https://images.unsplash.com/photo-1753198412280-b4a9729c1c51?w=720&q=80&auto=format&fit=crop',
colourfulRow: 'https://images.unsplash.com/photo-1718579019220-98697dc2fd72?w=720&q=80&auto=format&fit=crop',
busyTraffic: 'https://images.unsplash.com/photo-1645718171033-574c88494de2?w=720&q=80&auto=format&fit=crop',
cityTraffic: 'https://images.unsplash.com/photo-1714128949057-f7ac4cb71e6c?w=720&q=80&auto=format&fit=crop',
trafficLight: 'https://images.unsplash.com/photo-1680276553514-357f2edc46a1?w=720&q=80&auto=format&fit=crop',
leafySuburb: 'https://images.unsplash.com/photo-1663651884092-a2449ed3671a?w=720&q=80&auto=format&fit=crop',
suburbHomes: 'https://images.unsplash.com/photo-1768301346584-86e781872b82?w=720&q=80&auto=format&fit=crop',
trainPlatform: 'https://images.unsplash.com/photo-1684934899514-772e03714de5?w=720&q=80&auto=format&fit=crop',
trainClock: 'https://images.unsplash.com/photo-1657441629839-874d398b6e04?w=720&q=80&auto=format&fit=crop',
keysFrontDoor: 'https://images.unsplash.com/photo-1741156386380-0236c72eb6f9?w=720&q=80&auto=format&fit=crop',
};
const linger = (durationMs = 360): Activity[] => [{ kind: 'wait', durationMs }];
/**
@ -781,15 +756,6 @@ const ttDragAct = (toMin: number, durationMs = 1400): Activity => ({
toFraction: toMin / TT_SLIDER_MAX,
durationMs,
});
const showScene = (scene: AdScene): Activity => ({
kind: 'showAdScene',
scene,
durationMs: 0,
});
const hideScene = (durationMs = 320): Activity => ({
kind: 'hideAdScene',
durationMs,
});
const wait = (durationMs: number): Activity => ({ kind: 'wait', durationMs });
const mapZoomIn = (durationMs = 1400, steps = 5): Activity => ({
kind: 'mapZoom',
@ -860,17 +826,18 @@ const LONDON_VIEW = { lat: 51.4672, lon: -0.1276, zoom: 10.5 };
const AD_CONFIGS: DemoAdStoryboardConfig[] = [
// -------------------------------------------------------------------
// 01 — Search by sentence. Type the prompt on camera, narration runs
// simultaneously. Filters relevant: commute + crime + schools.
// simultaneously. Filters relevant: price + commute + crime + noise.
// -------------------------------------------------------------------
{
name: 'ad-01-london-prompt',
city: 'london',
promptText:
'Two bed in London, 35 min to centre, lower crime, lower noise',
'London flat under £600k, 35 min to centre, lower crime, lower noise',
filters: {
'Property type': ['Flats/Maisonettes'],
'Estimated current price': [0, 600000],
'Serious crime per 1k residents (avg/yr)': [0, 50],
'Road noise score (mean dB)': [0, 60],
'Noise (dB)': [0, 58],
},
travelTimeFilters: [
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 35 },
@ -879,20 +846,20 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 8,
cues: [
{
text: 'Describe the London home you actually want.',
text: 'Stop searching listing by listing. Search by the area brief.',
during: [typeAct(
'Two bed in London, 35 min to centre, lower crime, lower noise',
'London flat under £600k, 35 min to centre, lower crime, lower noise',
2800
)],
tail: [wait(200)],
},
{
text: 'Hit search. The map answers in one second.',
text: 'Price, commute, crime and noise land on the map together.',
during: [submitAct(1100)],
tail: [wait(700)],
},
{
text: 'Every lit postcode fits all five rules at once.',
text: 'Every lit postcode is somewhere worth checking first.',
tail: [wait(600)],
},
],
@ -914,16 +881,16 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 5.5,
cues: [
{
text: 'Watch what one slider does to your shortlist.',
text: 'Your commute limit should change the map, not your patience.',
tail: [wait(200)],
},
{
text: 'Drag forty minutes down to fifteen.',
text: 'Drag forty minutes down to fifteen minutes.',
during: [ttDragAct(15, 1900)],
tail: [wait(700)],
},
{
text: 'Half the map just lost its place.',
text: 'The reachable postcodes disappear in front of you.',
tail: [wait(600)],
},
],
@ -946,17 +913,17 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 10,
cues: [
{
text: 'Type the brief. Map fills with matching areas.',
text: 'Type a family brief and watch matching areas appear.',
during: [typeAct('Family home in London, decent schools nearby', 2400), submitAct(900)],
tail: [wait(500)],
},
{
text: 'Zoom past the hexagons. Real postcodes break open.',
text: 'Zoom from area patterns into actual postcodes.',
during: [mapZoomIn(3000, 10)],
tail: [wait(400)],
},
{
text: 'Tap one. Sold prices, schools, crime, noise.',
text: 'Tap one for sold prices and street-level context.',
during: [
{ kind: 'cursorScale', scale: 1.3, durationMs: 200 },
clickHex(900),
@ -992,7 +959,7 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 6,
cues: [
{
text: 'Four hundred grand. London. Thirty minute commute.',
text: 'London under four hundred thousand, with a thirty minute commute.',
during: [typeAct(
'Flat in London under £400k, 30 min to centre, lower crime',
2800
@ -1000,61 +967,57 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
tail: [wait(400)],
},
{
text: 'Watch the filters stack and the map shrink.',
text: 'The active filters stack up as the map tightens.',
during: [scrollFilters(280, 900)],
tail: [wait(600)],
},
{
text: 'Every lit postcode hits all four rules.',
text: 'Now the cheap-looking areas have to pass the brief.',
tail: [wait(500)],
},
],
},
// -------------------------------------------------------------------
// 05 — Two streets apart. Photo split is the hook. Caption stays
// SHORT so it does not compete with the overlay's title text.
// 05 — Two streets apart. Product-led now: noise + crime filters are
// typed and submitted on screen instead of masking the product with
// generic street photos.
// -------------------------------------------------------------------
{
name: 'ad-05-two-streets-apart',
city: 'london',
promptText: 'Quieter London, lower road noise',
promptText: 'Quiet London streets, lower noise, lower serious crime',
filters: {
'Road noise score (mean dB)': [0, 58],
'Serious crime per 1k residents (avg/yr)': [0, 50],
'Noise (dB)': [0, 55],
'Serious crime per 1k residents (avg/yr)': [0, 45],
},
initialZoom: 10.6,
posterTimeS: 4,
cues: [
{
text: 'Two homes. Four hundred metres apart.',
during: [showScene({
mode: 'split',
accent: 'rose',
kicker: 'Two streets',
title: 'Same price tag.',
images: [PHOTO.terracedRow, PHOTO.busyTraffic],
left: { title: 'Street A', meta: 'Quiet', tone: 'good' },
right: { title: 'Street B', meta: 'Main road', tone: 'bad' },
transparent: false,
})],
text: 'Two streets can look identical in a listing photo.',
during: [typeAct(
'Quiet London streets, lower noise, lower serious crime',
2500
), submitAct(900)],
tail: [wait(400)],
},
{
text: 'Filter noise and serious crime before you book a viewing.',
during: [scrollFilters(220, 800)],
tail: [wait(500)],
},
{
text: 'Same price. Completely different lives.',
tail: [wait(500)],
},
{
text: 'The map knows the difference. The photos do not.',
during: [hideScene(360)],
tail: [wait(700)],
text: 'Now the quieter pockets are the ones left on screen.',
during: [mapZoomIn(1300, 4)],
tail: [wait(600)],
},
],
},
// -------------------------------------------------------------------
// 06 — Commute tax. Photo hook (train platform) opens; cue 1 hides
// the overlay and the travel-time slider drags from 60 → 20 min.
// 06 — Commute tax. Starts on the live commute layer and immediately
// proves the point with the travel-time slider.
// -------------------------------------------------------------------
{
name: 'ad-06-london-commute-tax',
@ -1068,38 +1031,30 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 4,
cues: [
{
text: 'Twenty minutes or sixty. Same asking price.',
during: [showScene({
mode: 'title',
accent: 'amber',
kicker: 'Commute tax',
image: PHOTO.trainClock,
title: 'Cheap, until you count the hours.',
})],
tail: [wait(400)],
text: 'A cheap home gets expensive when the commute is wrong.',
tail: [wait(300)],
},
{
text: 'Drag the slider. Watch the map shrink.',
during: [hideScene(320), ttDragAct(20, 1800)],
text: 'Drag sixty minutes down to twenty and watch the map shrink.',
during: [ttDragAct(20, 1900)],
tail: [wait(700)],
},
{
text: 'Time is the bill you pay every week.',
text: 'That weekly time bill is visible before the viewing.',
tail: [wait(600)],
},
],
},
// -------------------------------------------------------------------
// 07 — Quiet near London. Leafy-suburb photo opens; cue 1 hides it
// and the dashboard (already filtered for low noise) is revealed.
// 07 — Quiet near London. Uses the real prod Noise (dB) feature.
// -------------------------------------------------------------------
{
name: 'ad-07-quiet-near-london',
city: 'london',
promptText: 'Quieter London, lower road noise, good transit',
filters: {
'Road noise score (mean dB)': [0, 56],
'Noise (dB)': [0, 55],
'Estimated current price': [0, 700000],
},
travelTimeFilters: [
@ -1109,30 +1064,25 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
posterTimeS: 4,
cues: [
{
text: 'Quiet streets, near London. They do exist.',
during: [showScene({
mode: 'title',
accent: 'teal',
image: PHOTO.leafySuburb,
title: 'Yes, they exist.',
})],
text: 'Quiet near London is searchable, not just hopeful.',
during: [typeAct('Quieter London, lower road noise, good transit', 2500), submitAct(900)],
tail: [wait(400)],
},
{
text: 'You just have to filter for noise, not price.',
during: [hideScene(320), scrollFilters(220, 800)],
text: 'Filter for noise alongside price and travel time.',
during: [scrollFilters(220, 800)],
tail: [wait(500)],
},
{
text: 'The hidden pockets light up.',
text: 'The calmer pockets show up before you go anywhere.',
tail: [wait(500)],
},
],
},
// -------------------------------------------------------------------
// 08 — The postcode comes with the keys. Keys photo opens; map shows
// London filtered for family-friendly area.
// 08 — The postcode comes with the keys. Keeps the memorable premise,
// but shows the product doing the work instead of a keys stock photo.
// -------------------------------------------------------------------
{
name: 'ad-08-postcode-with-the-keys',
@ -1142,99 +1092,104 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
'Estimated current price': [0, 750000],
'Outstanding primary schools within 2km': [1, 10],
'Serious crime per 1k residents (avg/yr)': [0, 50],
'Noise (dB)': [0, 58],
},
travelTimeFilters: [
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 45 },
],
initialZoom: 10.5,
posterTimeS: 3,
cues: [
{
text: 'You can renovate the kitchen.',
during: [showScene({
mode: 'title',
accent: 'lime',
image: PHOTO.keysFrontDoor,
title: 'You keep the postcode forever.',
})],
text: 'You can change the kitchen. You inherit the postcode.',
during: [typeAct(
'Family London, lower crime, good schools, lower noise',
2500
), submitAct(900)],
tail: [wait(400)],
},
{
text: 'You can not renovate the commute or the noise.',
text: 'So check commute, crime, schools and noise first.',
during: [scrollFilters(320, 900)],
tail: [wait(500)],
},
{
text: 'Pick the area first. The keys come second.',
during: [hideScene(320)],
during: [mapZoomIn(1200, 4)],
tail: [wait(600)],
},
],
},
// -------------------------------------------------------------------
// 09 — Waitrose distance. Niche filter that maps to social-class
// proxy. We type the brief, scroll the filter pane to surface the
// Waitrose-distance card explicitly.
// 09 — Amenities. Waitrose is the memorable example, but the copy
// frames it as practical amenity filtering rather than a throwaway gag.
// -------------------------------------------------------------------
{
name: 'ad-09-london-waitrose',
city: 'london',
promptText:
'London postcodes within walking distance of a Waitrose',
'London postcodes near Waitrose, tube and parks under £800k',
filters: {
'Distance to nearest Waitrose (km)': [0, 1],
'Distance to nearest tube station (km)': [0, 1.2],
'Distance to nearest park (km)': [0, 0.8],
'Estimated current price': [0, 800000],
},
initialZoom: 10.4,
posterTimeS: 7,
cues: [
{
text: 'How close is your nearest Waitrose. Yes, really.',
text: 'Amenities should be filters, not guesses from the photos.',
during: [typeAct(
'London postcodes within walking distance of a Waitrose',
'London postcodes near Waitrose, tube and parks under £800k',
2800
), submitAct(900)],
tail: [wait(400)],
},
{
text: 'The map highlights the lucky postcodes.',
during: [scrollFilters(180, 800)],
text: 'Waitrose, tube, parks and price can all count together.',
during: [scrollFilters(300, 900)],
tail: [wait(600)],
},
{
text: 'It is a real filter, not a meme.',
text: 'Now you know which postcodes actually match that lifestyle.',
tail: [wait(500)],
},
],
},
// -------------------------------------------------------------------
// 10 — Reform-voting councils. % Reform UK vote share as a filter.
// Politically tense — kept matter-of-fact, no spin in the copy.
// 10 — Local politics. Matter-of-fact and product-led; lower threshold
// keeps the map populated while still surfacing the Reform UK feature.
// -------------------------------------------------------------------
{
name: 'ad-10-reform-councils',
city: 'london',
city: 'leeds',
promptText:
'Areas where the council voted heavily for Reform UK',
'Areas with higher Reform UK vote share and lower prices',
filters: {
'% Reform UK': [25, 100],
'% Reform UK': [15, 100],
'Estimated current price': [0, 350000],
},
initialZoom: 9.5,
initialZoom: 10.5,
posterTimeS: 7,
cues: [
{
text: 'Want to know which way your future council voted.',
text: 'Local politics is part of the neighbourhood data too.',
during: [typeAct(
'Areas where the council voted heavily for Reform UK',
'Areas with higher Reform UK vote share and lower prices',
2600
)],
tail: [wait(300)],
},
{
text: 'Run the filter. See the map.',
text: 'Run the filter and see which areas stay in view.',
during: [submitAct(900), scrollFilters(180, 700)],
tail: [wait(500)],
},
{
text: 'Politics shapes the area too.',
text: 'No spin. Just another local signal before you buy.',
tail: [wait(500)],
},
],
@ -1247,76 +1202,67 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
name: 'ad-11-leeds-families',
city: 'leeds',
promptText:
'Three bed near Leeds, outstanding primary nearby, lower crime',
'Leeds family areas, good primary schools nearby, lower crime',
filters: {
'Estimated current price': [0, 380000],
'Outstanding primary schools within 2km': [2, 10],
'Good+ primary schools within 2km': [2, 10],
'Serious crime per 1k residents (avg/yr)': [0, 45],
},
initialZoom: 11.0,
posterTimeS: 6,
cues: [
{
text: 'Leeds, but only the school-run friendly bits.',
text: 'Find Leeds areas that work for the school run.',
during: [typeAct(
'Three bed near Leeds, outstanding primary nearby, lower crime',
'Leeds family areas, good primary schools nearby, lower crime',
2500
), submitAct(900)],
tail: [wait(300)],
},
{
text: 'Two outstanding primaries within walking distance.',
text: 'School quality and serious crime sit beside price.',
during: [scrollFilters(220, 800)],
tail: [wait(500)],
},
{
text: 'Every lit postcode is a real candidate.',
text: 'Every lit postcode is a better place to start.',
tail: [wait(500)],
},
],
},
// -------------------------------------------------------------------
// 12 — Pricing scarcity. Real prod numbers (verified via /api/pricing
// at render time): the £0.99 tier is sold out (50/50); the current
// £9.99 tier has 17 slots left before the next jump to £29.99. We
// surface those numbers in a structured rank scene over the live
// dashboard, since recording on the /pricing route would require a
// dashboard URL override and we want to ship this iteration.
// 12 — Pricing/value. Keeps the current £9.99 founder-price hook, but
// proves value through the product instead of a static scarcity card.
// -------------------------------------------------------------------
{
name: 'ad-12-pricing-scarcity',
city: 'london',
promptText: 'Quieter London, good schools, lower crime',
promptText: 'London under £700k, good schools, lower crime and lower noise',
filters: {
'Estimated current price': [0, 700000],
'Outstanding primary schools within 2km': [1, 10],
'Serious crime per 1k residents (avg/yr)': [0, 50],
'Noise (dB)': [0, 58],
},
initialZoom: 10.4,
posterTimeS: 3,
cues: [
{
text: 'Seventeen spots left at nine ninety nine.',
during: [showScene({
mode: 'rank',
accent: 'amber',
kicker: 'Founder pricing',
title: 'Cheap tier almost gone.',
items: [
{ label: '£0.99 / month', value: 'sold out', tone: 'bad' },
{ label: '£9.99 / month', value: '17 left', tone: 'warn' },
{ label: '£29.99 / month', value: 'next', tone: 'neutral' },
],
})],
text: 'Nine ninety nine beats one wasted viewing.',
during: [typeAct(
'London under £700k, good schools, lower crime and lower noise',
2700
), submitAct(900)],
tail: [wait(400)],
},
{
text: 'Then the price triples.',
text: 'Use the map before spending a Saturday in the wrong area.',
during: [scrollFilters(300, 900)],
tail: [wait(500)],
},
{
text: 'Get in before the next jump.',
during: [hideScene(360)],
text: 'The cheapest mistake is the one you skip.',
tail: [wait(600)],
},
],