Update data
This commit is contained in:
parent
a4103b0896
commit
273d7a83ee
15 changed files with 716 additions and 316 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -17,3 +17,7 @@ frontend/public/assets/*
|
|||
frontend/public/assets/.done
|
||||
server-rs/logs
|
||||
video/auth.*
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.mp4
|
||||
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ export function useSavedProperties(userId: string | null) {
|
|||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to save property';
|
||||
setError(msg);
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[userId, fetchProperties]
|
||||
|
|
|
|||
|
|
@ -23,6 +23,11 @@ html.dark {
|
|||
color-scheme: dark;
|
||||
}
|
||||
|
||||
button:not(:disabled),
|
||||
[role='button']:not([aria-disabled='true']) {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/* Smooth theme transitions (scoped to avoid map performance issues) */
|
||||
body,
|
||||
div,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,11 @@ import pyarrow as pa
|
|||
import pyarrow.csv as pa_csv
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
from ..utils import fuzzy_join_on_postcode
|
||||
from ..utils import (
|
||||
fuzzy_join_on_postcode,
|
||||
normalize_address_key,
|
||||
normalize_postcode_key,
|
||||
)
|
||||
|
||||
|
||||
pl.Config.set_tbl_cols(-1)
|
||||
|
|
@ -193,12 +197,15 @@ def main():
|
|||
|
||||
|
||||
def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Path):
|
||||
epc_base = _scan_epc_certificates(epc_path, temp_dir)
|
||||
epc_base = _scan_epc_certificates(epc_path, temp_dir).with_columns(
|
||||
normalize_address_key(pl.col("epc_address")).alias("_epc_match_address"),
|
||||
normalize_postcode_key(pl.col("epc_postcode")).alias("_epc_match_postcode"),
|
||||
)
|
||||
|
||||
# Dedup fork: keep latest certificate per property (existing logic)
|
||||
epc = (
|
||||
epc_base.sort("inspection_date", descending=True)
|
||||
.group_by("epc_address", "epc_postcode")
|
||||
.group_by("_epc_match_address", "_epc_match_postcode")
|
||||
.first()
|
||||
.drop("tenure")
|
||||
)
|
||||
|
|
@ -216,15 +223,15 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.with_columns(
|
||||
pl.col("number_habitable_rooms")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_rooms"),
|
||||
pl.col("total_floor_area")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_area"),
|
||||
pl.col("_rating_rank")
|
||||
.shift(1)
|
||||
.over("epc_address", "epc_postcode")
|
||||
.over("_epc_match_address", "_epc_match_postcode")
|
||||
.alias("_prev_rating_rank"),
|
||||
)
|
||||
.with_columns(
|
||||
|
|
@ -257,7 +264,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.cast(pl.Int32)
|
||||
.alias("_event_year"),
|
||||
)
|
||||
.group_by("epc_address", "epc_postcode")
|
||||
.group_by("_epc_match_address", "_epc_match_postcode")
|
||||
.agg(
|
||||
pl.struct(
|
||||
pl.col("_event_year").alias("year"),
|
||||
|
|
@ -276,7 +283,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
# Social tenure fork: flag properties that were ever social housing
|
||||
social_tenure = (
|
||||
epc_base.filter(pl.col("tenure").str.to_lowercase().str.contains("social"))
|
||||
.select("epc_address", "epc_postcode")
|
||||
.select("_epc_match_address", "_epc_match_postcode")
|
||||
.unique()
|
||||
.with_columns(pl.lit("Yes").alias("was_council_house"))
|
||||
.collect()
|
||||
|
|
@ -287,12 +294,12 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
epc = (
|
||||
epc.join(
|
||||
events.lazy(),
|
||||
on=["epc_address", "epc_postcode"],
|
||||
on=["_epc_match_address", "_epc_match_postcode"],
|
||||
how="left",
|
||||
)
|
||||
.join(
|
||||
social_tenure.lazy(),
|
||||
on=["epc_address", "epc_postcode"],
|
||||
on=["_epc_match_address", "_epc_match_postcode"],
|
||||
how="left",
|
||||
)
|
||||
.with_columns(
|
||||
|
|
@ -339,9 +346,23 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
ignore_nulls=True,
|
||||
).alias("pp_address"),
|
||||
)
|
||||
.with_columns(
|
||||
normalize_address_key(pl.col("pp_address")).alias("_pp_match_address"),
|
||||
normalize_postcode_key(pl.col("postcode")).alias("_pp_match_postcode"),
|
||||
)
|
||||
.filter(pl.col("_pp_match_postcode").is_not_null())
|
||||
.with_columns(
|
||||
pl.coalesce("_pp_match_address", "pp_address").alias("_pp_group_address"),
|
||||
pl.col("_pp_match_postcode").alias("_pp_group_postcode"),
|
||||
)
|
||||
.filter(pl.col("pp_address").is_not_null())
|
||||
.sort("date_of_transfer")
|
||||
.group_by("pp_address", "postcode", maintain_order=True)
|
||||
.group_by("_pp_group_address", "_pp_group_postcode", maintain_order=True)
|
||||
.agg(
|
||||
pl.col("pp_address").last(),
|
||||
pl.col("postcode").last(),
|
||||
pl.col("_pp_match_address").last(),
|
||||
pl.col("_pp_match_postcode").last(),
|
||||
pl.struct(
|
||||
pl.col("date_of_transfer").dt.year().alias("year"),
|
||||
pl.col("date_of_transfer").dt.month().cast(pl.UInt8).alias("month"),
|
||||
|
|
@ -354,7 +375,7 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
pl.col("date_of_transfer").first().alias("first_transfer_date"),
|
||||
pl.col("old_new").first(),
|
||||
)
|
||||
).filter(pl.col("pp_address").is_not_null())
|
||||
)
|
||||
|
||||
print("Price paid dataset")
|
||||
print(price_paid.head().collect())
|
||||
|
|
@ -405,7 +426,19 @@ def _run(epc_path: Path, price_paid_path: Path, output_path: Path, temp_dir: Pat
|
|||
.then(pl.lit(1, dtype=pl.UInt8))
|
||||
.otherwise(pl.lit(None, dtype=pl.UInt8))
|
||||
.alias("is_construction_date_approximate"),
|
||||
).drop("old_new", "first_transfer_date")
|
||||
).drop(
|
||||
[
|
||||
"old_new",
|
||||
"first_transfer_date",
|
||||
"_pp_match_address",
|
||||
"_pp_match_postcode",
|
||||
"_pp_group_address",
|
||||
"_pp_group_postcode",
|
||||
"_epc_match_address",
|
||||
"_epc_match_postcode",
|
||||
],
|
||||
strict=False,
|
||||
)
|
||||
|
||||
joined = joined.rename({col: col.lower() for col in joined.columns})
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ _AREA_COLUMNS = [
|
|||
"Postcode",
|
||||
"lat",
|
||||
"lon",
|
||||
# Runtime provenance for deciding whether missing coordinates are skippable.
|
||||
"ctry25cd",
|
||||
# Deprivation
|
||||
"Income Score",
|
||||
"Employment Score",
|
||||
|
|
@ -86,6 +88,15 @@ _AREA_COLUMNS = [
|
|||
_DYNAMIC_POI_DISTANCE_RE = re.compile(r"^Distance to nearest amenity \(.+\) \(km\)$")
|
||||
_DYNAMIC_POI_COUNT_RE = re.compile(r"^Number of amenities \(.+\) within (2|5)km$")
|
||||
TREE_DENSITY_FEATURE = "Street tree density percentile"
|
||||
_POSTCODE_TREE_DENSITY_PERCENTILE_RE = re.compile(
|
||||
r"^Tree canopy density percentile within \d+m$"
|
||||
)
|
||||
_RENT_SOURCE_UNAVAILABLE_LADS = {
|
||||
# ONS PIPR does not publish LAD-level private-rent estimates for these
|
||||
# small authorities. Keep rent null there, but fail on any other LAD miss.
|
||||
"E06000053": "Isles of Scilly",
|
||||
"E09000001": "City of London",
|
||||
}
|
||||
|
||||
|
||||
def _is_dynamic_poi_metric_column(column: str) -> bool:
|
||||
|
|
@ -112,6 +123,107 @@ def _less_deprived_percentile_expr(column: str) -> pl.Expr:
|
|||
)
|
||||
|
||||
|
||||
def _tree_density_by_postcode(tree_density_postcodes_path: Path) -> pl.LazyFrame:
|
||||
tree_density = pl.scan_parquet(tree_density_postcodes_path)
|
||||
columns = set(tree_density.collect_schema().names())
|
||||
if "postcode" not in columns:
|
||||
raise ValueError(
|
||||
f"{tree_density_postcodes_path} is missing required column: postcode"
|
||||
)
|
||||
|
||||
if TREE_DENSITY_FEATURE in columns:
|
||||
density_column = TREE_DENSITY_FEATURE
|
||||
else:
|
||||
candidates = sorted(
|
||||
c for c in columns if _POSTCODE_TREE_DENSITY_PERCENTILE_RE.match(c)
|
||||
)
|
||||
if len(candidates) != 1:
|
||||
raise ValueError(
|
||||
f'{tree_density_postcodes_path} must contain column "{TREE_DENSITY_FEATURE}" '
|
||||
'or exactly one "Tree canopy density percentile within {radius}m" column; '
|
||||
f"found {len(candidates)} postcode percentile columns"
|
||||
)
|
||||
density_column = candidates[0]
|
||||
|
||||
return (
|
||||
tree_density.select(
|
||||
pl.col("postcode"),
|
||||
pl.col(density_column).cast(pl.Float32).alias(TREE_DENSITY_FEATURE),
|
||||
)
|
||||
.drop_nulls(["postcode"])
|
||||
.unique(["postcode"])
|
||||
)
|
||||
|
||||
|
||||
def _validate_lad_source_coverage(
|
||||
iod_path: Path, ethnicity_path: Path, rental_prices_path: Path
|
||||
) -> None:
|
||||
iod_lads = (
|
||||
pl.read_parquet(
|
||||
iod_path,
|
||||
columns=[
|
||||
"Local Authority District code (2024)",
|
||||
"Local Authority District name (2024)",
|
||||
],
|
||||
)
|
||||
.rename(
|
||||
{
|
||||
"Local Authority District code (2024)": "lad",
|
||||
"Local Authority District name (2024)": "lad_name",
|
||||
}
|
||||
)
|
||||
.unique(["lad"])
|
||||
)
|
||||
|
||||
ethnicity_lads = pl.read_parquet(ethnicity_path, columns=["Geography_code"]).rename(
|
||||
{"Geography_code": "lad"}
|
||||
)
|
||||
missing_ethnicity = iod_lads.join(ethnicity_lads, on="lad", how="anti").sort("lad")
|
||||
if missing_ethnicity.height > 0:
|
||||
raise ValueError(
|
||||
"Ethnicity data is missing 2024 LAD coverage: "
|
||||
f"{missing_ethnicity.to_dicts()}"
|
||||
)
|
||||
|
||||
rental_lads = pl.read_parquet(rental_prices_path, columns=["area_code"]).rename(
|
||||
{"area_code": "lad"}
|
||||
)
|
||||
missing_rent = iod_lads.join(rental_lads, on="lad", how="anti").sort("lad")
|
||||
unexpected_missing_rent = missing_rent.filter(
|
||||
~pl.col("lad").is_in(list(_RENT_SOURCE_UNAVAILABLE_LADS))
|
||||
)
|
||||
if unexpected_missing_rent.height > 0:
|
||||
raise ValueError(
|
||||
"Rental data is missing 2024 LAD coverage: "
|
||||
f"{unexpected_missing_rent.to_dicts()}"
|
||||
)
|
||||
if missing_rent.height > 0:
|
||||
print(
|
||||
"PIPR has no LAD-level rent estimates for source-unavailable LADs; "
|
||||
f"rent will remain null there: {missing_rent.to_dicts()}"
|
||||
)
|
||||
|
||||
|
||||
def _validate_property_postcodes(df: pl.DataFrame) -> None:
|
||||
invalid = df.filter(
|
||||
pl.col("Postcode").is_null()
|
||||
| (pl.col("Postcode").cast(pl.Utf8).str.strip_chars() == "")
|
||||
)
|
||||
if invalid.height == 0:
|
||||
return
|
||||
|
||||
sample_cols = [
|
||||
col
|
||||
for col in ("Postcode", "Address per Property Register", "Last known price")
|
||||
if col in invalid.columns
|
||||
]
|
||||
sample = invalid.select(sample_cols).head(10).to_dicts()
|
||||
raise ValueError(
|
||||
"Property rows missing a postcode after merge: "
|
||||
f"{invalid.height} rows. Sample: {sample}"
|
||||
)
|
||||
|
||||
|
||||
def _build(
|
||||
epc_pp_path: Path,
|
||||
arcgis_path: Path,
|
||||
|
|
@ -126,12 +238,14 @@ def _build(
|
|||
lsoa_population_path: Path,
|
||||
median_age_path: Path,
|
||||
election_results_path: Path,
|
||||
tree_density_addresses_path: Path | None = None,
|
||||
tree_density_postcodes_path: Path | None = None,
|
||||
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
||||
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
|
||||
|
||||
Returns (postcode_df, properties_df).
|
||||
"""
|
||||
_validate_lad_source_coverage(iod_path, ethnicity_path, rental_prices_path)
|
||||
|
||||
wide = pl.scan_parquet(epc_pp_path).filter(
|
||||
pl.col("total_floor_area").is_null()
|
||||
| (pl.col("total_floor_area") > MIN_FLOOR_AREA_M2)
|
||||
|
|
@ -152,9 +266,15 @@ def _build(
|
|||
.drop("new_postcode")
|
||||
)
|
||||
|
||||
arcgis_raw = pl.scan_parquet(arcgis_path)
|
||||
postcode_country = arcgis_raw.select(
|
||||
pl.col("pcds").alias("postcode"),
|
||||
pl.col("ctry25cd"),
|
||||
).unique(["postcode"])
|
||||
wide = wide.join(postcode_country, on="postcode", how="left")
|
||||
|
||||
arcgis = (
|
||||
pl.scan_parquet(arcgis_path)
|
||||
.filter(pl.col("ctry25cd") == "E92000001") # England only
|
||||
arcgis_raw.filter(pl.col("ctry25cd") == "E92000001") # England only
|
||||
.filter(pl.col("doterm").is_null()) # Active postcodes only
|
||||
# NSPL Feb 2026 renamed geographic code columns to {field}{year}cd.
|
||||
# Alias them back to the short canonical names used across the
|
||||
|
|
@ -191,7 +311,9 @@ def _build(
|
|||
.cast(pl.UInt8)
|
||||
.alias("_bedrooms"),
|
||||
)
|
||||
rental = pl.scan_parquet(rental_prices_path)
|
||||
rental = pl.scan_parquet(rental_prices_path).select(
|
||||
"area_code", "bedrooms", "mean_monthly_rent"
|
||||
)
|
||||
wide = wide.join(
|
||||
rental,
|
||||
left_on=["Local Authority District code (2024)", "_bedrooms"],
|
||||
|
|
@ -260,17 +382,9 @@ def _build(
|
|||
school_proximity = pl.scan_parquet(school_proximity_path)
|
||||
wide = wide.join(school_proximity, on="postcode", how="left")
|
||||
|
||||
if tree_density_addresses_path is not None:
|
||||
tree_density = (
|
||||
pl.scan_parquet(tree_density_addresses_path)
|
||||
.select(
|
||||
pl.col("postcode"),
|
||||
pl.col("pp_address"),
|
||||
pl.col(TREE_DENSITY_FEATURE).cast(pl.Float32),
|
||||
)
|
||||
.unique(["postcode", "pp_address"])
|
||||
)
|
||||
wide = wide.join(tree_density, on=["postcode", "pp_address"], how="left")
|
||||
if tree_density_postcodes_path is not None:
|
||||
tree_density = _tree_density_by_postcode(tree_density_postcodes_path)
|
||||
wide = wide.join(tree_density, on="postcode", how="left")
|
||||
|
||||
# Broadband: derive max available download speed tier per postcode from
|
||||
# Ofcom availability percentages. Tiers: Gigabit ≥1000, UFBB ≥300,
|
||||
|
|
@ -415,6 +529,7 @@ def _build(
|
|||
|
||||
print("Collecting with streaming engine...")
|
||||
df = wide.collect(engine="streaming")
|
||||
_validate_property_postcodes(df)
|
||||
|
||||
# Split into postcode-level and property-level dataframes
|
||||
area_cols = [
|
||||
|
|
@ -508,10 +623,10 @@ def main():
|
|||
help="2024 General Election results by constituency parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tree-density-addresses",
|
||||
"--tree-density-postcodes",
|
||||
type=Path,
|
||||
required=False,
|
||||
help="Address-level tree density parquet from pipeline.transform.tree_density",
|
||||
help="Postcode-level tree density parquet from pipeline.transform.tree_density",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-postcodes",
|
||||
|
|
@ -541,7 +656,7 @@ def main():
|
|||
lsoa_population_path=args.lsoa_population,
|
||||
median_age_path=args.median_age,
|
||||
election_results_path=args.election_results,
|
||||
tree_density_addresses_path=args.tree_density_addresses,
|
||||
tree_density_postcodes_path=args.tree_density_postcodes,
|
||||
)
|
||||
|
||||
print(f"\nPostcode columns: {postcode_df.columns}")
|
||||
|
|
|
|||
|
|
@ -227,7 +227,18 @@ def main():
|
|||
fa = test["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
|
||||
|
||||
print("\nComputing kNN estimates...")
|
||||
knn_psm = knn_median_psm(trees, lat, lon, tg)
|
||||
last_sale_dates = (
|
||||
test["input_date"].dt.epoch("d").fill_null(-1).to_numpy().astype(np.int64)
|
||||
)
|
||||
knn_psm = knn_median_psm(
|
||||
trees,
|
||||
lat,
|
||||
lon,
|
||||
tg,
|
||||
postcodes=test["Postcode"].fill_null("").to_numpy(),
|
||||
last_prices=test["input_price"].cast(pl.Float64).to_numpy(),
|
||||
last_sale_dates=last_sale_dates,
|
||||
)
|
||||
|
||||
# Temporal adjustment: pool PSM is at ref, adjust to actual
|
||||
log_idx_actual = test["log_index_actual"].to_numpy().astype(np.float64)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ for lat/lon needed by kNN, then drops those columns before writing.
|
|||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
|
||||
from pipeline.transform.price_estimation.knn import (
|
||||
|
|
@ -28,6 +29,45 @@ from pipeline.transform.price_estimation.utils import (
|
|||
type_group_expr,
|
||||
)
|
||||
|
||||
MAX_KNN_TO_INDEX_RATIO = 2.0
|
||||
MIN_KNN_TO_INDEX_RATIO = 0.5
|
||||
MAX_ESTIMATE_TO_LAST_PRICE_RATIO = 6.0
|
||||
|
||||
|
||||
def guarded_blend_estimates(
|
||||
index_est: np.ndarray,
|
||||
knn_est: np.ndarray,
|
||||
last_prices: np.ndarray,
|
||||
weight: float = KNN_BLEND_WEIGHT,
|
||||
) -> np.ndarray:
|
||||
"""Blend only stable kNN estimates and cap final uplift from last sale price."""
|
||||
index_est = index_est.astype(np.float64, copy=False)
|
||||
knn_est = knn_est.astype(np.float64, copy=False)
|
||||
last_prices = last_prices.astype(np.float64, copy=False)
|
||||
|
||||
has_index = np.isfinite(index_est) & (index_est > 0)
|
||||
has_knn = np.isfinite(knn_est) & (knn_est > 0)
|
||||
stable_knn = has_knn & (
|
||||
has_index
|
||||
& (knn_est >= index_est * MIN_KNN_TO_INDEX_RATIO)
|
||||
& (knn_est <= index_est * MAX_KNN_TO_INDEX_RATIO)
|
||||
)
|
||||
|
||||
blended = np.where(
|
||||
has_index & stable_knn,
|
||||
(1 - weight) * index_est + weight * knn_est,
|
||||
np.where(has_index, index_est, np.nan),
|
||||
)
|
||||
|
||||
cap = np.where(
|
||||
np.isfinite(last_prices) & (last_prices > 0),
|
||||
last_prices * MAX_ESTIMATE_TO_LAST_PRICE_RATIO,
|
||||
np.nan,
|
||||
)
|
||||
return np.where(
|
||||
np.isfinite(cap) & np.isfinite(blended), np.minimum(blended, cap), blended
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
@ -130,36 +170,54 @@ def main():
|
|||
lon = df["lon"].cast(pl.Float64).to_numpy()
|
||||
tg = df["_type_group"].fill_null("").to_numpy()
|
||||
fa = df["Total floor area (sqm)"].cast(pl.Float64).fill_null(0.0).to_numpy()
|
||||
last_prices = (
|
||||
df["Last known price"].cast(pl.Float64).fill_null(float("nan")).to_numpy()
|
||||
)
|
||||
last_sale_dates = (
|
||||
df["Date of last transaction"]
|
||||
.dt.epoch("d")
|
||||
.fill_null(-1)
|
||||
.to_numpy()
|
||||
.astype(np.int64)
|
||||
)
|
||||
|
||||
knn_psm = knn_median_psm(trees, lat, lon, tg)
|
||||
knn_psm = knn_median_psm(
|
||||
trees,
|
||||
lat,
|
||||
lon,
|
||||
tg,
|
||||
postcodes=df["Postcode"].fill_null("").to_numpy(),
|
||||
last_prices=last_prices,
|
||||
last_sale_dates=last_sale_dates,
|
||||
)
|
||||
knn_est = knn_psm * fa # No temporal adj: ref == current
|
||||
|
||||
df = df.with_columns(
|
||||
pl.Series("_knn_est", knn_est, dtype=pl.Float64),
|
||||
)
|
||||
|
||||
# Blend: where kNN available, use weighted average; else keep index
|
||||
# Blend only when kNN is close to the index estimate; otherwise keep index.
|
||||
index_est = (
|
||||
df["Estimated current price"]
|
||||
.cast(pl.Float64)
|
||||
.fill_null(float("nan"))
|
||||
.to_numpy()
|
||||
)
|
||||
blended = guarded_blend_estimates(index_est, knn_est, last_prices)
|
||||
df = df.with_columns(
|
||||
pl.when(
|
||||
pl.col("Estimated current price").is_not_null()
|
||||
& pl.col("_knn_est").is_not_null()
|
||||
& pl.col("_knn_est").is_finite()
|
||||
& (pl.col("_knn_est") > 0)
|
||||
)
|
||||
.then(
|
||||
(1 - KNN_BLEND_WEIGHT) * pl.col("Estimated current price")
|
||||
+ KNN_BLEND_WEIGHT * pl.col("_knn_est")
|
||||
)
|
||||
.when(pl.col("Estimated current price").is_not_null())
|
||||
.then(pl.col("Estimated current price"))
|
||||
.otherwise(pl.lit(None))
|
||||
.alias("Estimated current price"),
|
||||
pl.Series("_index_est", index_est, dtype=pl.Float64),
|
||||
pl.Series("Estimated current price", blended, dtype=pl.Float64),
|
||||
).with_columns(
|
||||
pl.col("Estimated current price").fill_nan(None),
|
||||
)
|
||||
|
||||
n_blended = df.filter(
|
||||
pl.col("_knn_est").is_not_null()
|
||||
& pl.col("_knn_est").is_finite()
|
||||
& (pl.col("_knn_est") > 0)
|
||||
& (pl.col("_index_est").is_not_null())
|
||||
& (pl.col("_knn_est") >= pl.col("_index_est") * MIN_KNN_TO_INDEX_RATIO)
|
||||
& (pl.col("_knn_est") <= pl.col("_index_est") * MAX_KNN_TO_INDEX_RATIO)
|
||||
& pl.col("Estimated current price").is_not_null()
|
||||
).height
|
||||
print(f" kNN blended: {n_blended:,} of {n_estimated:,} estimates")
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ from pipeline.transform.price_estimation.utils import (
|
|||
KNN_K = 20
|
||||
KNN_MIN_NEIGHBORS = 5
|
||||
KNN_BLEND_WEIGHT = 0.35
|
||||
MIN_COMPARABLE_FLOOR_AREA_SQM = 15.0
|
||||
MAX_COMPARABLE_FLOOR_AREA_SQM = 1_000.0
|
||||
MIN_COMPARABLE_PSM = 500.0
|
||||
MAX_COMPARABLE_PSM = 50_000.0
|
||||
|
||||
|
||||
def _scale_coords(lat: np.ndarray, lon: np.ndarray) -> np.ndarray:
|
||||
|
|
@ -33,13 +37,14 @@ def build_knn_pool(
|
|||
index: pl.DataFrame,
|
||||
ref_frac_year: float,
|
||||
max_sale_year: int | None = None,
|
||||
) -> dict[str, tuple[KDTree, np.ndarray]]:
|
||||
) -> dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]]:
|
||||
"""Build per-type_group KD-trees of index-adjusted price-per-sqm.
|
||||
|
||||
Adjusts all pool properties' sale prices to ref_frac_year using the index,
|
||||
then builds a KD-tree per type_group for nearest-neighbor queries.
|
||||
|
||||
Returns dict mapping type_group -> (KDTree over scaled lat/lon, adjusted_psm array).
|
||||
Returns dict mapping type_group to KDTree, adjusted PSM, and sale identity
|
||||
arrays used to keep the target sale out of its own comparable set.
|
||||
"""
|
||||
print("Building kNN pool...")
|
||||
lf = pl.scan_parquet(source) if isinstance(source, Path) else source
|
||||
|
|
@ -55,7 +60,8 @@ def build_knn_pool(
|
|||
pl.col("lat").is_not_null(),
|
||||
pl.col("lon").is_not_null(),
|
||||
pl.col("Total floor area (sqm)").is_not_null(),
|
||||
pl.col("Total floor area (sqm)") > 0,
|
||||
pl.col("Total floor area (sqm)") >= MIN_COMPARABLE_FLOOR_AREA_SQM,
|
||||
pl.col("Total floor area (sqm)") <= MAX_COMPARABLE_FLOOR_AREA_SQM,
|
||||
pl.col("Last known price").is_not_null(),
|
||||
pl.col("Last known price") > 0,
|
||||
pl.col("Postcode").is_not_null(),
|
||||
|
|
@ -97,12 +103,13 @@ def build_knn_pool(
|
|||
).filter(
|
||||
pl.col("_adj_psm").is_not_null(),
|
||||
pl.col("_adj_psm").is_finite(),
|
||||
pl.col("_adj_psm") > 0,
|
||||
pl.col("_adj_psm") >= MIN_COMPARABLE_PSM,
|
||||
pl.col("_adj_psm") <= MAX_COMPARABLE_PSM,
|
||||
)
|
||||
print(f" {len(pool):,} after index adjustment")
|
||||
|
||||
# Build per-type KD-trees
|
||||
trees: dict[str, tuple[KDTree, np.ndarray]] = {}
|
||||
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]] = {}
|
||||
for tg in TYPE_GROUPS:
|
||||
sub = pool.filter(pl.col("type_group") == tg)
|
||||
n = len(sub)
|
||||
|
|
@ -111,19 +118,49 @@ def build_knn_pool(
|
|||
lat = sub["lat"].to_numpy().astype(np.float64)
|
||||
lon = sub["lon"].to_numpy().astype(np.float64)
|
||||
psm = sub["_adj_psm"].to_numpy().astype(np.float64)
|
||||
postcodes = sub["Postcode"].fill_null("").to_numpy()
|
||||
prices = sub["Last known price"].to_numpy().astype(np.float64)
|
||||
sale_dates = (
|
||||
sub["Date of last transaction"]
|
||||
.dt.epoch("d")
|
||||
.fill_null(-1)
|
||||
.to_numpy()
|
||||
.astype(np.int64)
|
||||
)
|
||||
tree = KDTree(_scale_coords(lat, lon))
|
||||
trees[tg] = (tree, psm)
|
||||
trees[tg] = (tree, psm, postcodes, prices, sale_dates)
|
||||
print(f" {tg}: {n:,}")
|
||||
|
||||
return trees
|
||||
|
||||
|
||||
def _sale_identity_matches(
|
||||
pool_postcodes: np.ndarray,
|
||||
pool_prices: np.ndarray,
|
||||
pool_sale_dates: np.ndarray,
|
||||
target_postcode: str,
|
||||
target_price: float,
|
||||
target_sale_date: int,
|
||||
) -> np.ndarray:
|
||||
if not target_postcode or not np.isfinite(target_price) or target_sale_date < 0:
|
||||
return np.zeros(len(pool_postcodes), dtype=bool)
|
||||
return (
|
||||
(pool_postcodes == target_postcode)
|
||||
& np.isfinite(pool_prices)
|
||||
& np.isclose(pool_prices, target_price, rtol=0.0, atol=0.5)
|
||||
& (pool_sale_dates == target_sale_date)
|
||||
)
|
||||
|
||||
|
||||
def knn_median_psm(
|
||||
trees: dict[str, tuple[KDTree, np.ndarray]],
|
||||
trees: dict[str, tuple[KDTree, np.ndarray, np.ndarray, np.ndarray, np.ndarray]],
|
||||
lat: np.ndarray,
|
||||
lon: np.ndarray,
|
||||
type_groups: np.ndarray,
|
||||
k: int = KNN_K,
|
||||
postcodes: np.ndarray | None = None,
|
||||
last_prices: np.ndarray | None = None,
|
||||
last_sale_dates: np.ndarray | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Return median adjusted-PSM of k nearest neighbours for each target.
|
||||
|
||||
|
|
@ -133,21 +170,41 @@ def knn_median_psm(
|
|||
n = len(lat)
|
||||
result = np.full(n, np.nan)
|
||||
|
||||
for tg, (tree, psm) in trees.items():
|
||||
for tg, (tree, psm, pool_postcodes, pool_prices, pool_sale_dates) in trees.items():
|
||||
mask = (type_groups == tg) & np.isfinite(lat) & np.isfinite(lon)
|
||||
idx = np.where(mask)[0]
|
||||
if len(idx) == 0:
|
||||
continue
|
||||
|
||||
actual_k = min(k, len(psm))
|
||||
if actual_k < KNN_MIN_NEIGHBORS:
|
||||
query_k = min(max(k * 2, k + KNN_MIN_NEIGHBORS), len(psm))
|
||||
if query_k < KNN_MIN_NEIGHBORS:
|
||||
continue
|
||||
|
||||
coords = _scale_coords(lat[idx], lon[idx])
|
||||
_, nn_idx = tree.query(coords, k=actual_k)
|
||||
_, nn_idx = tree.query(coords, k=query_k)
|
||||
if nn_idx.ndim == 1:
|
||||
nn_idx = nn_idx.reshape(-1, 1)
|
||||
|
||||
result[idx] = np.nanmedian(psm[nn_idx], axis=1)
|
||||
medians = np.full(len(idx), np.nan)
|
||||
for row_num, target_idx in enumerate(idx):
|
||||
candidates = nn_idx[row_num]
|
||||
if (
|
||||
postcodes is not None
|
||||
and last_prices is not None
|
||||
and last_sale_dates is not None
|
||||
):
|
||||
same_sale = _sale_identity_matches(
|
||||
pool_postcodes[candidates],
|
||||
pool_prices[candidates],
|
||||
pool_sale_dates[candidates],
|
||||
str(postcodes[target_idx] or ""),
|
||||
float(last_prices[target_idx]),
|
||||
int(last_sale_dates[target_idx]),
|
||||
)
|
||||
candidates = candidates[~same_sale]
|
||||
if len(candidates) >= KNN_MIN_NEIGHBORS:
|
||||
medians[row_num] = np.nanmedian(psm[candidates[:k]])
|
||||
|
||||
result[idx] = medians
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ TERRACE_TYPES = [
|
|||
"Terraced",
|
||||
]
|
||||
FLAT_TYPES = ["Flats/Maisonettes"]
|
||||
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats", "Bungalow"]
|
||||
TYPE_GROUPS = ["Detached", "Semi-Detached", "Terraced", "Flats"]
|
||||
SHRINKAGE_K = 50
|
||||
|
||||
|
||||
|
|
@ -30,8 +30,6 @@ def type_group_expr():
|
|||
.then(pl.lit("Terraced"))
|
||||
.when(pl.col("Property type").is_in(FLAT_TYPES))
|
||||
.then(pl.lit("Flats"))
|
||||
.when(pl.col("Property type") == "Bungalow")
|
||||
.then(pl.lit("Bungalow"))
|
||||
.when(pl.col("Property type").is_in(["Detached", "Semi-Detached"]))
|
||||
.then(pl.col("Property type"))
|
||||
.otherwise(pl.lit(None))
|
||||
|
|
@ -61,7 +59,7 @@ def hierarchy_keys(sector: str) -> tuple[str, str]:
|
|||
return district, area
|
||||
|
||||
|
||||
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats", "Bungalow"]
|
||||
NON_REF_TYPES = ["Terraced", "Semi-Detached", "Flats"]
|
||||
|
||||
|
||||
def build_hedonic_features(df: pl.DataFrame) -> np.ndarray:
|
||||
|
|
|
|||
|
|
@ -15,12 +15,21 @@ DROP_CATEGORIES = {
|
|||
"amenity/bicycle_parking",
|
||||
"amenity/binoculars",
|
||||
"amenity/boot_scraper",
|
||||
"amenity/bus_garage",
|
||||
"amenity/check_in",
|
||||
"amenity/clock",
|
||||
"amenity/clothes_dryer",
|
||||
"amenity/coast_guard",
|
||||
"amenity/coffin_rest",
|
||||
"amenity/compressed_air",
|
||||
"amenity/court_yard",
|
||||
"amenity/donation_box",
|
||||
"amenity/dressing_room",
|
||||
"amenity/drinking_water",
|
||||
"emergency/water_tank",
|
||||
"leisure/bleachers",
|
||||
"leisure/schoolyard",
|
||||
"public_transport/pay_scale_area",
|
||||
"shop/taxi",
|
||||
"amenity/feeding_place",
|
||||
"amenity/fixme",
|
||||
|
|
@ -31,6 +40,7 @@ DROP_CATEGORIES = {
|
|||
"amenity/lounge",
|
||||
"tourism/preserved_railway",
|
||||
"amenity/lounger",
|
||||
"leisure/sport",
|
||||
"amenity/motorcycle_parking",
|
||||
"amenity/mounting_block",
|
||||
"amenity/notice_board",
|
||||
|
|
@ -71,8 +81,12 @@ DROP_CATEGORIES = {
|
|||
"amenity/boat_storage",
|
||||
"amenity/bureau_de_change",
|
||||
"amenity/bus_station",
|
||||
"amenity/beachhut",
|
||||
"amenity/canteen",
|
||||
"amenity/conference_centre",
|
||||
"amenity/crematorium",
|
||||
"amenity/disused",
|
||||
"amenity/driver_training",
|
||||
"amenity/driving_school",
|
||||
"amenity/escooter_rental",
|
||||
"amenity/ferry_terminal",
|
||||
|
|
@ -82,14 +96,21 @@ DROP_CATEGORIES = {
|
|||
"amenity/kick-scooter_rental",
|
||||
"amenity/money_transfer",
|
||||
"amenity/post_depot",
|
||||
"amenity/prison",
|
||||
"amenity/public_building",
|
||||
"amenity/recycling",
|
||||
"amenity/scout_hut",
|
||||
"amenity/social_facility",
|
||||
"amenity/studio",
|
||||
"amenity/student_accommodation",
|
||||
"amenity/taxi",
|
||||
"amenity/telephone_exchange",
|
||||
"amenity/training",
|
||||
"amenity/vehicle_inspection",
|
||||
"amenity/waiting_room",
|
||||
"amenity/yes",
|
||||
"shop/disused",
|
||||
"shop/no",
|
||||
# Buildings (except church & university which are mapped)
|
||||
"building/air_shaft",
|
||||
"building/apartments",
|
||||
|
|
@ -148,12 +169,14 @@ DROP_CATEGORIES = {
|
|||
"emergency/yes",
|
||||
"tourism/apartment",
|
||||
"tourism/apartments",
|
||||
"tourism/alpine_hut",
|
||||
"tourism/camp_pitch",
|
||||
"tourism/caravan_site",
|
||||
"tourism/information",
|
||||
"tourism/picnic_site",
|
||||
"tourism/viewpoint",
|
||||
"tourism/village_sign",
|
||||
"tourism/wilderness_hut",
|
||||
"tourism/yes",
|
||||
# Public transport (from NaPTAN instead)
|
||||
"public_transport/entrance",
|
||||
|
|
@ -191,6 +214,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🍺",
|
||||
[
|
||||
"amenity/pub",
|
||||
"amenity/beer_garden",
|
||||
"amenity/biergarten",
|
||||
"amenity/social_club",
|
||||
"amenity/club",
|
||||
"leisure/social_club",
|
||||
|
|
@ -293,7 +318,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"leisure/miniature_golf",
|
||||
"leisure/horse_riding",
|
||||
"leisure/fishing",
|
||||
"leisure/ice_rink",
|
||||
"leisure/paddling_pool",
|
||||
"leisure/practice_pitch",
|
||||
"leisure/shooting_ground",
|
||||
"leisure/stadium",
|
||||
"leisure/swimming_pool",
|
||||
"leisure/swimming_area",
|
||||
"leisure/water_park",
|
||||
"leisure/bathing_place",
|
||||
],
|
||||
|
|
@ -307,9 +338,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"leisure/amusement_arcade",
|
||||
"leisure/adult_gaming_centre",
|
||||
"leisure/escape_game",
|
||||
"leisure/maze",
|
||||
"leisure/trampoline_park",
|
||||
"leisure/sauna",
|
||||
"leisure/tanning_salon",
|
||||
"shop/amusements",
|
||||
"tourism/theme_park",
|
||||
"amenity/bicycle_rental",
|
||||
"amenity/boat_rental",
|
||||
|
|
@ -345,6 +378,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/bakery",
|
||||
"shop/pastry",
|
||||
"craft/bakery",
|
||||
"craft/confectionery",
|
||||
],
|
||||
),
|
||||
|
|
@ -364,6 +398,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/greengrocer",
|
||||
"shop/farm",
|
||||
"shop/market",
|
||||
"amenity/marketplace",
|
||||
],
|
||||
),
|
||||
|
|
@ -424,6 +459,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/appliance",
|
||||
"shop/electrical",
|
||||
"shop/hifi",
|
||||
"shop/vacuum_cleaner",
|
||||
"shop/video_games",
|
||||
"shop/games",
|
||||
],
|
||||
|
|
@ -444,7 +480,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"shop/doityourself",
|
||||
"shop/hardware",
|
||||
"shop/builders_merchant",
|
||||
"shop/paint",
|
||||
"shop/plumbing",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -462,11 +500,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/curtain",
|
||||
"shop/flooring",
|
||||
"shop/fireplace",
|
||||
"shop/garden_furniture",
|
||||
"shop/groundskeeping",
|
||||
"shop/household",
|
||||
"shop/household_linen",
|
||||
"shop/houseware",
|
||||
"shop/homeware",
|
||||
"shop/interior_decoration",
|
||||
"shop/lighting",
|
||||
"shop/kitchenware",
|
||||
"shop/window_blind",
|
||||
],
|
||||
),
|
||||
|
|
@ -493,8 +535,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🏕️",
|
||||
[
|
||||
"shop/sports",
|
||||
"shop/angling",
|
||||
"shop/outdoor",
|
||||
"shop/bicycle",
|
||||
"shop/equestrian",
|
||||
"shop/surf",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -532,9 +577,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/music",
|
||||
"shop/musical_instrument",
|
||||
"shop/antiques",
|
||||
"shop/anime",
|
||||
"shop/baby_goods",
|
||||
"shop/fabric",
|
||||
"shop/haberdashery",
|
||||
"shop/hobby",
|
||||
"shop/wool",
|
||||
"shop/pottery",
|
||||
],
|
||||
|
|
@ -549,9 +596,13 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/bookmaker",
|
||||
"shop/building_materials",
|
||||
"shop/camera",
|
||||
"shop/cannabis",
|
||||
"shop/car",
|
||||
"shop/caravan",
|
||||
"shop/catalogue",
|
||||
"shop/auction",
|
||||
"shop/auction_house",
|
||||
"shop/chandler",
|
||||
"shop/collector",
|
||||
"shop/copyshop",
|
||||
"shop/country_store",
|
||||
|
|
@ -560,6 +611,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/erotic",
|
||||
"shop/esoteric",
|
||||
"shop/fan",
|
||||
"shop/fireworks",
|
||||
"shop/fishing",
|
||||
"shop/frame",
|
||||
"shop/fuel",
|
||||
|
|
@ -582,6 +634,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/scuba_diving",
|
||||
"shop/security",
|
||||
"shop/sewing",
|
||||
"shop/ship_chandler",
|
||||
"shop/signs",
|
||||
"shop/storage_rental",
|
||||
"shop/swimming_pool",
|
||||
"shop/telecommunication",
|
||||
|
|
@ -590,7 +644,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/tool_hire",
|
||||
"shop/trade",
|
||||
"shop/trophy",
|
||||
"shop/truck",
|
||||
"shop/vacant",
|
||||
"shop/van",
|
||||
"shop/video",
|
||||
"shop/water_sports",
|
||||
"shop/weapons",
|
||||
|
|
@ -611,6 +667,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"shop/cosmetics",
|
||||
"shop/massage",
|
||||
"shop/perfumery",
|
||||
"leisure/spa",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -757,6 +814,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"amenity/hospital",
|
||||
"amenity/clinic",
|
||||
"amenity/health_centre",
|
||||
"healthcare/blood_donation",
|
||||
"healthcare/hospital",
|
||||
"healthcare/centre",
|
||||
"healthcare/clinic",
|
||||
|
|
@ -804,6 +863,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
[
|
||||
"amenity/care_home",
|
||||
"amenity/nursing_home",
|
||||
"amenity/retirement_home",
|
||||
"healthcare/hospice",
|
||||
"healthcare/nursing_home",
|
||||
"office/home_care",
|
||||
],
|
||||
),
|
||||
|
|
@ -848,6 +910,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"⛪",
|
||||
[
|
||||
"amenity/place_of_worship",
|
||||
"amenity/monastery",
|
||||
"building/church",
|
||||
],
|
||||
),
|
||||
|
|
@ -873,6 +936,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"📸",
|
||||
[
|
||||
"tourism/attraction",
|
||||
"tourism/aquarium",
|
||||
"amenity/fountain",
|
||||
"amenity/courthouse",
|
||||
"tourism/chalet",
|
||||
|
|
@ -892,6 +956,7 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"building/university",
|
||||
"amenity/kindergarten",
|
||||
"amenity/childcare",
|
||||
"office/tutoring",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -904,6 +969,9 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"tourism/guest_house",
|
||||
"tourism/motel",
|
||||
"tourism/camp_site",
|
||||
"leisure/resort",
|
||||
"tourism/holiday_park",
|
||||
"tourism/self_catering",
|
||||
],
|
||||
),
|
||||
(
|
||||
|
|
@ -928,14 +996,19 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"craft/window_construction",
|
||||
"craft/agricultural_engines",
|
||||
"craft/atelier",
|
||||
"craft/beekeeper",
|
||||
"craft/blacksmith",
|
||||
"craft/bookbinder",
|
||||
"craft/boatbuilder",
|
||||
"craft/caterer",
|
||||
"craft/carpet_layer",
|
||||
"craft/clockmaker",
|
||||
"craft/handicraft",
|
||||
"craft/jeweller",
|
||||
"craft/metal_construction",
|
||||
"craft/photographer",
|
||||
"craft/photographic_laboratory",
|
||||
"craft/plasterer",
|
||||
"craft/pottery",
|
||||
"craft/printer",
|
||||
"craft/sawmill",
|
||||
|
|
@ -946,22 +1019,28 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"craft/upholsterer",
|
||||
"craft/watchmaker",
|
||||
"craft/yes",
|
||||
"amenity/workshop",
|
||||
"shop/glaziery",
|
||||
"shop/windows",
|
||||
# Professional offices & estate agents
|
||||
"shop/estate_agent",
|
||||
"office/accountant",
|
||||
"office/architect",
|
||||
"office/auctioneer",
|
||||
"office/builder",
|
||||
"office/construction",
|
||||
"office/construction_company",
|
||||
"office/engineer",
|
||||
"office/estate_agent",
|
||||
"office/financial",
|
||||
"office/financial_advisor",
|
||||
"office/financial_services",
|
||||
"office/insurance",
|
||||
"office/lawyer",
|
||||
"office/mortgage",
|
||||
"office/property_management",
|
||||
"office/solicitor",
|
||||
"office/solicitors",
|
||||
"office/surveyor",
|
||||
"office/tax_advisor",
|
||||
],
|
||||
|
|
@ -972,6 +1051,8 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"🏢",
|
||||
[
|
||||
"amenity/coworking_space",
|
||||
"amenity/research_institute",
|
||||
"office/administrative",
|
||||
"office/advertising_agency",
|
||||
"office/association",
|
||||
"office/charity",
|
||||
|
|
@ -997,12 +1078,15 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"office/notary",
|
||||
"office/political_party",
|
||||
"office/politician",
|
||||
"office/publisher",
|
||||
"office/quango",
|
||||
"office/recruitment",
|
||||
"office/religion",
|
||||
"office/research",
|
||||
"office/security",
|
||||
"office/taxi",
|
||||
"office/telecommunication",
|
||||
"office/transport",
|
||||
"office/union",
|
||||
"office/university",
|
||||
"office/vacant",
|
||||
|
|
@ -1032,7 +1116,11 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"Community Centre",
|
||||
"🤝",
|
||||
[
|
||||
"amenity/church_hall",
|
||||
"amenity/clubhouse",
|
||||
"amenity/community_centre",
|
||||
"amenity/community_hall",
|
||||
"amenity/scout_hall",
|
||||
"amenity/social_centre",
|
||||
"amenity/townhall",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ from scipy.spatial import cKDTree
|
|||
from .haversine import haversine_km
|
||||
|
||||
EARTH_RADIUS_KM = 6371.0088
|
||||
KM_PER_DEGREE_LAT = 111.32
|
||||
DEFAULT_GRID_SIZE_DEGREES = 0.02
|
||||
|
||||
|
||||
def _build_poi_grid(
|
||||
|
|
@ -34,16 +36,29 @@ def _build_poi_grid(
|
|||
|
||||
|
||||
def _get_nearby_indices(
|
||||
pc_lat: float, pc_lon: float, poi_grid: dict, grid_size: float = 0.05
|
||||
pc_lat: float,
|
||||
pc_lon: float,
|
||||
poi_grid: dict,
|
||||
radius_km: float,
|
||||
grid_size: float = DEFAULT_GRID_SIZE_DEGREES,
|
||||
) -> np.ndarray | None:
|
||||
"""Get POI indices from grid cells near the given coordinate."""
|
||||
grid_lat = int(np.floor(pc_lat / grid_size))
|
||||
grid_lng = int(np.floor(pc_lon / grid_size))
|
||||
"""Get POI indices from all grid cells intersecting the radius bounding box."""
|
||||
if not np.isfinite(pc_lat) or not np.isfinite(pc_lon):
|
||||
return None
|
||||
|
||||
lat_delta = radius_km / KM_PER_DEGREE_LAT
|
||||
cos_lat = abs(np.cos(np.radians(pc_lat)))
|
||||
lng_delta = 180.0 if cos_lat < 1e-12 else radius_km / (KM_PER_DEGREE_LAT * cos_lat)
|
||||
|
||||
min_grid_lat = int(np.floor((pc_lat - lat_delta) / grid_size))
|
||||
max_grid_lat = int(np.floor((pc_lat + lat_delta) / grid_size))
|
||||
min_grid_lng = int(np.floor((pc_lon - lng_delta) / grid_size))
|
||||
max_grid_lng = int(np.floor((pc_lon + lng_delta) / grid_size))
|
||||
|
||||
nearby_indices = []
|
||||
for dlat in [-1, 0, 1]:
|
||||
for dlng in [-1, 0, 1]:
|
||||
cell_key = (grid_lat + dlat, grid_lng + dlng)
|
||||
for grid_lat in range(min_grid_lat, max_grid_lat + 1):
|
||||
for grid_lng in range(min_grid_lng, max_grid_lng + 1):
|
||||
cell_key = (grid_lat, grid_lng)
|
||||
if cell_key in poi_grid:
|
||||
nearby_indices.append(poi_grid[cell_key])
|
||||
|
||||
|
|
@ -83,7 +98,7 @@ def count_pois_per_postcode(
|
|||
n_pois = len(pois)
|
||||
print(f" {n_postcodes:,} postcodes, {n_pois:,} POIs")
|
||||
|
||||
grid_size = 0.05
|
||||
grid_size = DEFAULT_GRID_SIZE_DEGREES
|
||||
print(" Building POI spatial grid...")
|
||||
poi_lats, poi_lngs, poi_cats, poi_grid = _build_poi_grid(pois, grid_size)
|
||||
print(f" POI grid has {len(poi_grid):,} occupied cells")
|
||||
|
|
@ -120,7 +135,9 @@ def count_pois_per_postcode(
|
|||
|
||||
# Process batch
|
||||
for i in range(start_idx, end_idx):
|
||||
nearby = _get_nearby_indices(pc_lats[i], pc_lons[i], poi_grid, grid_size)
|
||||
nearby = _get_nearby_indices(
|
||||
pc_lats[i], pc_lons[i], poi_grid, radius_km, grid_size
|
||||
)
|
||||
if nearby is None:
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -215,6 +215,14 @@ struct Cli {
|
|||
#[arg(long, env = "STRIPE_REFERRAL_COUPON_ID")]
|
||||
stripe_referral_coupon_id: String,
|
||||
|
||||
/// Bearer token required to scrape /metrics.
|
||||
#[arg(long, env = "METRICS_BEARER_TOKEN")]
|
||||
metrics_bearer_token: Option<String>,
|
||||
|
||||
/// Allow unauthenticated /metrics scraping when no METRICS_BEARER_TOKEN is set.
|
||||
#[arg(long, env = "ALLOW_PUBLIC_METRICS", default_value_t = false)]
|
||||
allow_public_metrics: bool,
|
||||
|
||||
/// Google OAuth client ID for PocketBase SSO
|
||||
#[arg(long, env = "GOOGLE_OAUTH_CLIENT_ID")]
|
||||
google_oauth_client_id: String,
|
||||
|
|
@ -246,6 +254,8 @@ async fn main() -> anyhow::Result<()> {
|
|||
info!("Prometheus metrics initialized");
|
||||
|
||||
let cli = Cli::parse();
|
||||
let metrics_bearer_token = cli.metrics_bearer_token.clone();
|
||||
let allow_public_metrics = cli.allow_public_metrics;
|
||||
|
||||
for (label, path) in [
|
||||
("Properties", &cli.properties),
|
||||
|
|
@ -510,7 +520,10 @@ async fn main() -> anyhow::Result<()> {
|
|||
let public_url_tiles = initial_state.public_url.clone();
|
||||
|
||||
let api = Router::new()
|
||||
.route("/api/features", get(routes::get_features))
|
||||
.route(
|
||||
"/api/features",
|
||||
get(routes::get_features).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/hexagons",
|
||||
get(routes::get_hexagons).layer(ConcurrencyLimitLayer::new(20)),
|
||||
|
|
@ -519,30 +532,57 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/postcodes",
|
||||
get(routes::get_postcodes).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route("/api/postcode/{postcode}", get(routes::get_postcode_lookup))
|
||||
.route("/api/nearest-postcode", get(routes::get_nearest_postcode))
|
||||
.route(
|
||||
"/api/postcode/{postcode}",
|
||||
get(routes::get_postcode_lookup).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/nearest-postcode",
|
||||
get(routes::get_nearest_postcode).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/pois",
|
||||
get(routes::get_pois).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route("/api/poi-categories", get(routes::get_poi_categories))
|
||||
.route("/api/places", get(routes::get_places))
|
||||
.route("/api/travel-modes", get(routes::get_travel_modes))
|
||||
.route(
|
||||
"/api/poi-categories",
|
||||
get(routes::get_poi_categories).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/places",
|
||||
get(routes::get_places).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/travel-modes",
|
||||
get(routes::get_travel_modes).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/travel-destinations",
|
||||
get(routes::get_travel_destinations),
|
||||
get(routes::get_travel_destinations).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/journey",
|
||||
get(routes::get_journey).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route("/api/journey", get(routes::get_journey))
|
||||
.route(
|
||||
"/api/hexagon-properties",
|
||||
get(routes::get_hexagon_properties),
|
||||
get(routes::get_hexagon_properties).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/filter-counts",
|
||||
get(routes::get_filter_counts).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/hexagon-stats",
|
||||
get(routes::get_hexagon_stats).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/postcode-stats",
|
||||
get(routes::get_postcode_stats).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route("/api/filter-counts", get(routes::get_filter_counts))
|
||||
.route("/api/hexagon-stats", get(routes::get_hexagon_stats))
|
||||
.route("/api/postcode-stats", get(routes::get_postcode_stats))
|
||||
.route(
|
||||
"/api/postcode-properties",
|
||||
get(routes::get_postcode_properties),
|
||||
get(routes::get_postcode_properties).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/screenshot",
|
||||
|
|
@ -552,13 +592,26 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/export",
|
||||
get(routes::get_export).layer(ConcurrencyLimitLayer::new(3)),
|
||||
)
|
||||
.route("/api/me", get(routes::get_me))
|
||||
.route("/api/shorten", post(routes::post_shorten))
|
||||
.route(
|
||||
"/api/me",
|
||||
get(routes::get_me).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/shorten",
|
||||
post(routes::post_shorten).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/share-links",
|
||||
get(routes::get_share_links).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/ai-filters",
|
||||
post(routes::post_ai_filters).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route("/api/streetview", get(routes::get_streetview))
|
||||
.route(
|
||||
"/api/streetview",
|
||||
get(routes::get_streetview).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/rightmove-search",
|
||||
get(routes::get_rightmove_redirect).layer(ConcurrencyLimitLayer::new(10)),
|
||||
|
|
@ -567,23 +620,44 @@ async fn main() -> anyhow::Result<()> {
|
|||
"/api/newsletter",
|
||||
patch(routes::patch_newsletter).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route("/api/pricing", get(routes::get_pricing))
|
||||
.route(
|
||||
"/api/pricing",
|
||||
get(routes::get_pricing).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/api/checkout",
|
||||
post(routes::post_checkout).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route("/api/stripe-webhook", post(routes::post_stripe_webhook))
|
||||
.route(
|
||||
"/api/stripe-webhook",
|
||||
post(routes::post_stripe_webhook).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route(
|
||||
"/api/invites",
|
||||
get(routes::get_invites).post(routes::post_invites),
|
||||
get(routes::get_invites)
|
||||
.post(routes::post_invites)
|
||||
.layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/invite/{code}",
|
||||
get(routes::get_invite).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/api/redeem-invite",
|
||||
post(routes::post_redeem_invite).layer(ConcurrencyLimitLayer::new(5)),
|
||||
)
|
||||
.route(
|
||||
"/s/{code}",
|
||||
get(routes::get_short_url).layer(ConcurrencyLimitLayer::new(10)),
|
||||
)
|
||||
.route("/api/invite/{code}", get(routes::get_invite))
|
||||
.route("/api/redeem-invite", post(routes::post_redeem_invite))
|
||||
.route("/s/{code}", get(routes::get_short_url))
|
||||
.route(
|
||||
"/api/telemetry",
|
||||
post(routes::post_telemetry).layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route(
|
||||
"/pb/api/realtime",
|
||||
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(50)),
|
||||
)
|
||||
.route(
|
||||
"/pb/{*rest}",
|
||||
any(routes::proxy_to_pocketbase).layer(ConcurrencyLimitLayer::new(10)),
|
||||
|
|
@ -591,19 +665,28 @@ async fn main() -> anyhow::Result<()> {
|
|||
// Tile routes use a different state type — kept as closures
|
||||
.route(
|
||||
"/api/tiles/{z}/{x}/{y}",
|
||||
get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path)),
|
||||
get(move |path| routes::get_tile(axum::extract::State(reader_tile.clone()), path))
|
||||
.layer(ConcurrencyLimitLayer::new(30)),
|
||||
)
|
||||
.route(
|
||||
"/api/tiles/style.json",
|
||||
get(move |query| {
|
||||
let pu = public_url_tiles.clone();
|
||||
routes::get_style(axum::extract::State(reader_style.clone()), pu, query)
|
||||
}),
|
||||
})
|
||||
.layer(ConcurrencyLimitLayer::new(20)),
|
||||
)
|
||||
.route("/health", get(|| async { "ok" }))
|
||||
.route(
|
||||
"/metrics",
|
||||
get(move || metrics::metrics_handler(metrics_handle.clone())),
|
||||
get(move |headers| {
|
||||
metrics::metrics_handler(
|
||||
metrics_handle.clone(),
|
||||
metrics_bearer_token.clone(),
|
||||
allow_public_metrics,
|
||||
headers,
|
||||
)
|
||||
}),
|
||||
)
|
||||
.with_state(shared.clone());
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use serde::Deserialize;
|
|||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET};
|
||||
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, POSTCODE_SEARCH_OFFSET};
|
||||
use crate::licensing::{check_license_point, resolve_share_code};
|
||||
use crate::parsing::{parse_filters_with_poi, row_passes_filters, row_passes_poi_filters};
|
||||
use crate::state::SharedState;
|
||||
|
|
@ -151,10 +151,7 @@ pub async fn get_postcode_properties(
|
|||
});
|
||||
|
||||
let total = matching_rows.len();
|
||||
let limit = params
|
||||
.limit
|
||||
.unwrap_or(DEFAULT_PROPERTIES_LIMIT)
|
||||
.min(MAX_PROPERTIES_LIMIT);
|
||||
let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT);
|
||||
let page_offset = params.offset.unwrap_or(0);
|
||||
let truncated = total > page_offset + limit;
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ use tracing::info;
|
|||
|
||||
use crate::aggregation::{Aggregator, EnumDistConfig, PoiAggregator};
|
||||
use crate::auth::OptionalUser;
|
||||
use crate::consts::MAX_CELLS_PER_REQUEST;
|
||||
use crate::data::travel_time::TravelData;
|
||||
use crate::licensing::{check_license_bounds, resolve_share_code};
|
||||
use crate::parsing::{
|
||||
|
|
@ -354,73 +353,61 @@ pub async fn get_postcodes(
|
|||
|
||||
features.push(feature);
|
||||
included_postcodes.insert(pc_idx);
|
||||
|
||||
if features.len() >= MAX_CELLS_PER_REQUEST {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if features.len() < MAX_CELLS_PER_REQUEST {
|
||||
for pc_idx in selectable_postcodes {
|
||||
if included_postcodes.contains(&pc_idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
|
||||
|
||||
if !bounds_intersect(
|
||||
pc_south as f64,
|
||||
pc_west as f64,
|
||||
pc_north as f64,
|
||||
pc_east as f64,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
) {
|
||||
filtered_out += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let geometry = postcode_data.geometry_geojson(pc_idx);
|
||||
let centroid = postcode_data.centroids[pc_idx];
|
||||
let mut props = Map::new();
|
||||
props.insert(
|
||||
"postcode".into(),
|
||||
Value::String(postcode_data.postcodes[pc_idx].clone()),
|
||||
);
|
||||
props.insert("count".into(), Value::from(0));
|
||||
props.insert(
|
||||
"centroid".into(),
|
||||
Value::Array(vec![
|
||||
Value::from(centroid.1 as f64),
|
||||
Value::from(centroid.0 as f64),
|
||||
]),
|
||||
);
|
||||
|
||||
let mut feature = Map::new();
|
||||
feature.insert("type".into(), Value::String("Feature".into()));
|
||||
feature.insert("geometry".into(), geometry);
|
||||
feature.insert("properties".into(), Value::Object(props));
|
||||
|
||||
features.push(feature);
|
||||
|
||||
if features.len() >= MAX_CELLS_PER_REQUEST {
|
||||
break;
|
||||
}
|
||||
for pc_idx in selectable_postcodes {
|
||||
if included_postcodes.contains(&pc_idx) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
|
||||
|
||||
if !bounds_intersect(
|
||||
pc_south as f64,
|
||||
pc_west as f64,
|
||||
pc_north as f64,
|
||||
pc_east as f64,
|
||||
south,
|
||||
west,
|
||||
north,
|
||||
east,
|
||||
) {
|
||||
filtered_out += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let geometry = postcode_data.geometry_geojson(pc_idx);
|
||||
let centroid = postcode_data.centroids[pc_idx];
|
||||
let mut props = Map::new();
|
||||
props.insert(
|
||||
"postcode".into(),
|
||||
Value::String(postcode_data.postcodes[pc_idx].clone()),
|
||||
);
|
||||
props.insert("count".into(), Value::from(0));
|
||||
props.insert(
|
||||
"centroid".into(),
|
||||
Value::Array(vec![
|
||||
Value::from(centroid.1 as f64),
|
||||
Value::from(centroid.0 as f64),
|
||||
]),
|
||||
);
|
||||
|
||||
let mut feature = Map::new();
|
||||
feature.insert("type".into(), Value::String("Feature".into()));
|
||||
feature.insert("geometry".into(), geometry);
|
||||
feature.insert("properties".into(), Value::Object(props));
|
||||
|
||||
features.push(feature);
|
||||
}
|
||||
|
||||
histogram!("postcodes_response_count").record(features.len() as f64);
|
||||
|
||||
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
|
||||
let t_total = t0.elapsed();
|
||||
info!(
|
||||
postcodes_before_filter,
|
||||
matching_postcodes,
|
||||
postcodes_after_filter = features.len(),
|
||||
filtered_out,
|
||||
truncated,
|
||||
bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east),
|
||||
filters = num_filters,
|
||||
filters_raw = filters_str.as_deref().unwrap_or("-"),
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import {
|
|||
hex,
|
||||
vfrac,
|
||||
type Activity,
|
||||
type AdScene,
|
||||
type Storyboard,
|
||||
type TravelTimeFilter,
|
||||
type VideoConfig,
|
||||
|
|
@ -629,30 +628,6 @@ const AD_DEFAULT_FILTERS: Record<string, [number, number] | string[]> = {
|
|||
'Outstanding primary schools within 2km': [0, 10],
|
||||
};
|
||||
|
||||
/**
|
||||
* Stable Unsplash CDN photo URLs. Each one is a 720-wide JPEG fetched at
|
||||
* record time. The CDN serves with permissive CORS, no auth needed, and
|
||||
* the IDs are stable URLs (Unsplash does not rotate them). If any photo
|
||||
* stops resolving, dom.ts hides the broken image and the rest of the
|
||||
* scene still renders, so a 404 here degrades to text-only rather than
|
||||
* breaking the ad. To swap a photo, search unsplash.com for the theme
|
||||
* and paste the `photo-{id}` slug from the URL bar.
|
||||
*/
|
||||
const PHOTO = {
|
||||
terracedRow: 'https://images.unsplash.com/photo-1769344694490-66fb22a8d8cf?w=720&q=80&auto=format&fit=crop',
|
||||
brickStreet: 'https://images.unsplash.com/photo-1689867373120-355ce130d485?w=720&q=80&auto=format&fit=crop',
|
||||
woodAccentHouses: 'https://images.unsplash.com/photo-1753198412280-b4a9729c1c51?w=720&q=80&auto=format&fit=crop',
|
||||
colourfulRow: 'https://images.unsplash.com/photo-1718579019220-98697dc2fd72?w=720&q=80&auto=format&fit=crop',
|
||||
busyTraffic: 'https://images.unsplash.com/photo-1645718171033-574c88494de2?w=720&q=80&auto=format&fit=crop',
|
||||
cityTraffic: 'https://images.unsplash.com/photo-1714128949057-f7ac4cb71e6c?w=720&q=80&auto=format&fit=crop',
|
||||
trafficLight: 'https://images.unsplash.com/photo-1680276553514-357f2edc46a1?w=720&q=80&auto=format&fit=crop',
|
||||
leafySuburb: 'https://images.unsplash.com/photo-1663651884092-a2449ed3671a?w=720&q=80&auto=format&fit=crop',
|
||||
suburbHomes: 'https://images.unsplash.com/photo-1768301346584-86e781872b82?w=720&q=80&auto=format&fit=crop',
|
||||
trainPlatform: 'https://images.unsplash.com/photo-1684934899514-772e03714de5?w=720&q=80&auto=format&fit=crop',
|
||||
trainClock: 'https://images.unsplash.com/photo-1657441629839-874d398b6e04?w=720&q=80&auto=format&fit=crop',
|
||||
keysFrontDoor: 'https://images.unsplash.com/photo-1741156386380-0236c72eb6f9?w=720&q=80&auto=format&fit=crop',
|
||||
};
|
||||
|
||||
const linger = (durationMs = 360): Activity[] => [{ kind: 'wait', durationMs }];
|
||||
|
||||
/**
|
||||
|
|
@ -781,15 +756,6 @@ const ttDragAct = (toMin: number, durationMs = 1400): Activity => ({
|
|||
toFraction: toMin / TT_SLIDER_MAX,
|
||||
durationMs,
|
||||
});
|
||||
const showScene = (scene: AdScene): Activity => ({
|
||||
kind: 'showAdScene',
|
||||
scene,
|
||||
durationMs: 0,
|
||||
});
|
||||
const hideScene = (durationMs = 320): Activity => ({
|
||||
kind: 'hideAdScene',
|
||||
durationMs,
|
||||
});
|
||||
const wait = (durationMs: number): Activity => ({ kind: 'wait', durationMs });
|
||||
const mapZoomIn = (durationMs = 1400, steps = 5): Activity => ({
|
||||
kind: 'mapZoom',
|
||||
|
|
@ -860,17 +826,18 @@ const LONDON_VIEW = { lat: 51.4672, lon: -0.1276, zoom: 10.5 };
|
|||
const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
||||
// -------------------------------------------------------------------
|
||||
// 01 — Search by sentence. Type the prompt on camera, narration runs
|
||||
// simultaneously. Filters relevant: commute + crime + schools.
|
||||
// simultaneously. Filters relevant: price + commute + crime + noise.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-01-london-prompt',
|
||||
city: 'london',
|
||||
promptText:
|
||||
'Two bed in London, 35 min to centre, lower crime, lower noise',
|
||||
'London flat under £600k, 35 min to centre, lower crime, lower noise',
|
||||
filters: {
|
||||
'Property type': ['Flats/Maisonettes'],
|
||||
'Estimated current price': [0, 600000],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 50],
|
||||
'Road noise score (mean dB)': [0, 60],
|
||||
'Noise (dB)': [0, 58],
|
||||
},
|
||||
travelTimeFilters: [
|
||||
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 35 },
|
||||
|
|
@ -879,20 +846,20 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 8,
|
||||
cues: [
|
||||
{
|
||||
text: 'Describe the London home you actually want.',
|
||||
text: 'Stop searching listing by listing. Search by the area brief.',
|
||||
during: [typeAct(
|
||||
'Two bed in London, 35 min to centre, lower crime, lower noise',
|
||||
'London flat under £600k, 35 min to centre, lower crime, lower noise',
|
||||
2800
|
||||
)],
|
||||
tail: [wait(200)],
|
||||
},
|
||||
{
|
||||
text: 'Hit search. The map answers in one second.',
|
||||
text: 'Price, commute, crime and noise land on the map together.',
|
||||
during: [submitAct(1100)],
|
||||
tail: [wait(700)],
|
||||
},
|
||||
{
|
||||
text: 'Every lit postcode fits all five rules at once.',
|
||||
text: 'Every lit postcode is somewhere worth checking first.',
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
|
|
@ -914,16 +881,16 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 5.5,
|
||||
cues: [
|
||||
{
|
||||
text: 'Watch what one slider does to your shortlist.',
|
||||
text: 'Your commute limit should change the map, not your patience.',
|
||||
tail: [wait(200)],
|
||||
},
|
||||
{
|
||||
text: 'Drag forty minutes down to fifteen.',
|
||||
text: 'Drag forty minutes down to fifteen minutes.',
|
||||
during: [ttDragAct(15, 1900)],
|
||||
tail: [wait(700)],
|
||||
},
|
||||
{
|
||||
text: 'Half the map just lost its place.',
|
||||
text: 'The reachable postcodes disappear in front of you.',
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
|
|
@ -946,17 +913,17 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 10,
|
||||
cues: [
|
||||
{
|
||||
text: 'Type the brief. Map fills with matching areas.',
|
||||
text: 'Type a family brief and watch matching areas appear.',
|
||||
during: [typeAct('Family home in London, decent schools nearby', 2400), submitAct(900)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Zoom past the hexagons. Real postcodes break open.',
|
||||
text: 'Zoom from area patterns into actual postcodes.',
|
||||
during: [mapZoomIn(3000, 10)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'Tap one. Sold prices, schools, crime, noise.',
|
||||
text: 'Tap one for sold prices and street-level context.',
|
||||
during: [
|
||||
{ kind: 'cursorScale', scale: 1.3, durationMs: 200 },
|
||||
clickHex(900),
|
||||
|
|
@ -992,7 +959,7 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 6,
|
||||
cues: [
|
||||
{
|
||||
text: 'Four hundred grand. London. Thirty minute commute.',
|
||||
text: 'London under four hundred thousand, with a thirty minute commute.',
|
||||
during: [typeAct(
|
||||
'Flat in London under £400k, 30 min to centre, lower crime',
|
||||
2800
|
||||
|
|
@ -1000,61 +967,57 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'Watch the filters stack and the map shrink.',
|
||||
text: 'The active filters stack up as the map tightens.',
|
||||
during: [scrollFilters(280, 900)],
|
||||
tail: [wait(600)],
|
||||
},
|
||||
{
|
||||
text: 'Every lit postcode hits all four rules.',
|
||||
text: 'Now the cheap-looking areas have to pass the brief.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 05 — Two streets apart. Photo split is the hook. Caption stays
|
||||
// SHORT so it does not compete with the overlay's title text.
|
||||
// 05 — Two streets apart. Product-led now: noise + crime filters are
|
||||
// typed and submitted on screen instead of masking the product with
|
||||
// generic street photos.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-05-two-streets-apart',
|
||||
city: 'london',
|
||||
promptText: 'Quieter London, lower road noise',
|
||||
promptText: 'Quiet London streets, lower noise, lower serious crime',
|
||||
filters: {
|
||||
'Road noise score (mean dB)': [0, 58],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 50],
|
||||
'Noise (dB)': [0, 55],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 45],
|
||||
},
|
||||
initialZoom: 10.6,
|
||||
posterTimeS: 4,
|
||||
cues: [
|
||||
{
|
||||
text: 'Two homes. Four hundred metres apart.',
|
||||
during: [showScene({
|
||||
mode: 'split',
|
||||
accent: 'rose',
|
||||
kicker: 'Two streets',
|
||||
title: 'Same price tag.',
|
||||
images: [PHOTO.terracedRow, PHOTO.busyTraffic],
|
||||
left: { title: 'Street A', meta: 'Quiet', tone: 'good' },
|
||||
right: { title: 'Street B', meta: 'Main road', tone: 'bad' },
|
||||
transparent: false,
|
||||
})],
|
||||
text: 'Two streets can look identical in a listing photo.',
|
||||
during: [typeAct(
|
||||
'Quiet London streets, lower noise, lower serious crime',
|
||||
2500
|
||||
), submitAct(900)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'Filter noise and serious crime before you book a viewing.',
|
||||
during: [scrollFilters(220, 800)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Same price. Completely different lives.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'The map knows the difference. The photos do not.',
|
||||
during: [hideScene(360)],
|
||||
tail: [wait(700)],
|
||||
text: 'Now the quieter pockets are the ones left on screen.',
|
||||
during: [mapZoomIn(1300, 4)],
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 06 — Commute tax. Photo hook (train platform) opens; cue 1 hides
|
||||
// the overlay and the travel-time slider drags from 60 → 20 min.
|
||||
// 06 — Commute tax. Starts on the live commute layer and immediately
|
||||
// proves the point with the travel-time slider.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-06-london-commute-tax',
|
||||
|
|
@ -1068,38 +1031,30 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 4,
|
||||
cues: [
|
||||
{
|
||||
text: 'Twenty minutes or sixty. Same asking price.',
|
||||
during: [showScene({
|
||||
mode: 'title',
|
||||
accent: 'amber',
|
||||
kicker: 'Commute tax',
|
||||
image: PHOTO.trainClock,
|
||||
title: 'Cheap, until you count the hours.',
|
||||
})],
|
||||
tail: [wait(400)],
|
||||
text: 'A cheap home gets expensive when the commute is wrong.',
|
||||
tail: [wait(300)],
|
||||
},
|
||||
{
|
||||
text: 'Drag the slider. Watch the map shrink.',
|
||||
during: [hideScene(320), ttDragAct(20, 1800)],
|
||||
text: 'Drag sixty minutes down to twenty and watch the map shrink.',
|
||||
during: [ttDragAct(20, 1900)],
|
||||
tail: [wait(700)],
|
||||
},
|
||||
{
|
||||
text: 'Time is the bill you pay every week.',
|
||||
text: 'That weekly time bill is visible before the viewing.',
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 07 — Quiet near London. Leafy-suburb photo opens; cue 1 hides it
|
||||
// and the dashboard (already filtered for low noise) is revealed.
|
||||
// 07 — Quiet near London. Uses the real prod Noise (dB) feature.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-07-quiet-near-london',
|
||||
city: 'london',
|
||||
promptText: 'Quieter London, lower road noise, good transit',
|
||||
filters: {
|
||||
'Road noise score (mean dB)': [0, 56],
|
||||
'Noise (dB)': [0, 55],
|
||||
'Estimated current price': [0, 700000],
|
||||
},
|
||||
travelTimeFilters: [
|
||||
|
|
@ -1109,30 +1064,25 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
posterTimeS: 4,
|
||||
cues: [
|
||||
{
|
||||
text: 'Quiet streets, near London. They do exist.',
|
||||
during: [showScene({
|
||||
mode: 'title',
|
||||
accent: 'teal',
|
||||
image: PHOTO.leafySuburb,
|
||||
title: 'Yes, they exist.',
|
||||
})],
|
||||
text: 'Quiet near London is searchable, not just hopeful.',
|
||||
during: [typeAct('Quieter London, lower road noise, good transit', 2500), submitAct(900)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'You just have to filter for noise, not price.',
|
||||
during: [hideScene(320), scrollFilters(220, 800)],
|
||||
text: 'Filter for noise alongside price and travel time.',
|
||||
during: [scrollFilters(220, 800)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'The hidden pockets light up.',
|
||||
text: 'The calmer pockets show up before you go anywhere.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 08 — The postcode comes with the keys. Keys photo opens; map shows
|
||||
// London filtered for family-friendly area.
|
||||
// 08 — The postcode comes with the keys. Keeps the memorable premise,
|
||||
// but shows the product doing the work instead of a keys stock photo.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-08-postcode-with-the-keys',
|
||||
|
|
@ -1142,99 +1092,104 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
'Estimated current price': [0, 750000],
|
||||
'Outstanding primary schools within 2km': [1, 10],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 50],
|
||||
'Noise (dB)': [0, 58],
|
||||
},
|
||||
travelTimeFilters: [
|
||||
{ mode: 'transit', slug: 'london', label: 'London city centre', max: 45 },
|
||||
],
|
||||
initialZoom: 10.5,
|
||||
posterTimeS: 3,
|
||||
cues: [
|
||||
{
|
||||
text: 'You can renovate the kitchen.',
|
||||
during: [showScene({
|
||||
mode: 'title',
|
||||
accent: 'lime',
|
||||
image: PHOTO.keysFrontDoor,
|
||||
title: 'You keep the postcode forever.',
|
||||
})],
|
||||
text: 'You can change the kitchen. You inherit the postcode.',
|
||||
during: [typeAct(
|
||||
'Family London, lower crime, good schools, lower noise',
|
||||
2500
|
||||
), submitAct(900)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'You can not renovate the commute or the noise.',
|
||||
text: 'So check commute, crime, schools and noise first.',
|
||||
during: [scrollFilters(320, 900)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Pick the area first. The keys come second.',
|
||||
during: [hideScene(320)],
|
||||
during: [mapZoomIn(1200, 4)],
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 09 — Waitrose distance. Niche filter that maps to social-class
|
||||
// proxy. We type the brief, scroll the filter pane to surface the
|
||||
// Waitrose-distance card explicitly.
|
||||
// 09 — Amenities. Waitrose is the memorable example, but the copy
|
||||
// frames it as practical amenity filtering rather than a throwaway gag.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-09-london-waitrose',
|
||||
city: 'london',
|
||||
promptText:
|
||||
'London postcodes within walking distance of a Waitrose',
|
||||
'London postcodes near Waitrose, tube and parks under £800k',
|
||||
filters: {
|
||||
'Distance to nearest Waitrose (km)': [0, 1],
|
||||
'Distance to nearest tube station (km)': [0, 1.2],
|
||||
'Distance to nearest park (km)': [0, 0.8],
|
||||
'Estimated current price': [0, 800000],
|
||||
},
|
||||
initialZoom: 10.4,
|
||||
posterTimeS: 7,
|
||||
cues: [
|
||||
{
|
||||
text: 'How close is your nearest Waitrose. Yes, really.',
|
||||
text: 'Amenities should be filters, not guesses from the photos.',
|
||||
during: [typeAct(
|
||||
'London postcodes within walking distance of a Waitrose',
|
||||
'London postcodes near Waitrose, tube and parks under £800k',
|
||||
2800
|
||||
), submitAct(900)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'The map highlights the lucky postcodes.',
|
||||
during: [scrollFilters(180, 800)],
|
||||
text: 'Waitrose, tube, parks and price can all count together.',
|
||||
during: [scrollFilters(300, 900)],
|
||||
tail: [wait(600)],
|
||||
},
|
||||
{
|
||||
text: 'It is a real filter, not a meme.',
|
||||
text: 'Now you know which postcodes actually match that lifestyle.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 10 — Reform-voting councils. % Reform UK vote share as a filter.
|
||||
// Politically tense — kept matter-of-fact, no spin in the copy.
|
||||
// 10 — Local politics. Matter-of-fact and product-led; lower threshold
|
||||
// keeps the map populated while still surfacing the Reform UK feature.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-10-reform-councils',
|
||||
city: 'london',
|
||||
city: 'leeds',
|
||||
promptText:
|
||||
'Areas where the council voted heavily for Reform UK',
|
||||
'Areas with higher Reform UK vote share and lower prices',
|
||||
filters: {
|
||||
'% Reform UK': [25, 100],
|
||||
'% Reform UK': [15, 100],
|
||||
'Estimated current price': [0, 350000],
|
||||
},
|
||||
initialZoom: 9.5,
|
||||
initialZoom: 10.5,
|
||||
posterTimeS: 7,
|
||||
cues: [
|
||||
{
|
||||
text: 'Want to know which way your future council voted.',
|
||||
text: 'Local politics is part of the neighbourhood data too.',
|
||||
during: [typeAct(
|
||||
'Areas where the council voted heavily for Reform UK',
|
||||
'Areas with higher Reform UK vote share and lower prices',
|
||||
2600
|
||||
)],
|
||||
tail: [wait(300)],
|
||||
},
|
||||
{
|
||||
text: 'Run the filter. See the map.',
|
||||
text: 'Run the filter and see which areas stay in view.',
|
||||
during: [submitAct(900), scrollFilters(180, 700)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Politics shapes the area too.',
|
||||
text: 'No spin. Just another local signal before you buy.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
],
|
||||
|
|
@ -1247,76 +1202,67 @@ const AD_CONFIGS: DemoAdStoryboardConfig[] = [
|
|||
name: 'ad-11-leeds-families',
|
||||
city: 'leeds',
|
||||
promptText:
|
||||
'Three bed near Leeds, outstanding primary nearby, lower crime',
|
||||
'Leeds family areas, good primary schools nearby, lower crime',
|
||||
filters: {
|
||||
'Estimated current price': [0, 380000],
|
||||
'Outstanding primary schools within 2km': [2, 10],
|
||||
'Good+ primary schools within 2km': [2, 10],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 45],
|
||||
},
|
||||
initialZoom: 11.0,
|
||||
posterTimeS: 6,
|
||||
cues: [
|
||||
{
|
||||
text: 'Leeds, but only the school-run friendly bits.',
|
||||
text: 'Find Leeds areas that work for the school run.',
|
||||
during: [typeAct(
|
||||
'Three bed near Leeds, outstanding primary nearby, lower crime',
|
||||
'Leeds family areas, good primary schools nearby, lower crime',
|
||||
2500
|
||||
), submitAct(900)],
|
||||
tail: [wait(300)],
|
||||
},
|
||||
{
|
||||
text: 'Two outstanding primaries within walking distance.',
|
||||
text: 'School quality and serious crime sit beside price.',
|
||||
during: [scrollFilters(220, 800)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Every lit postcode is a real candidate.',
|
||||
text: 'Every lit postcode is a better place to start.',
|
||||
tail: [wait(500)],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// 12 — Pricing scarcity. Real prod numbers (verified via /api/pricing
|
||||
// at render time): the £0.99 tier is sold out (50/50); the current
|
||||
// £9.99 tier has 17 slots left before the next jump to £29.99. We
|
||||
// surface those numbers in a structured rank scene over the live
|
||||
// dashboard, since recording on the /pricing route would require a
|
||||
// dashboard URL override and we want to ship this iteration.
|
||||
// 12 — Pricing/value. Keeps the current £9.99 founder-price hook, but
|
||||
// proves value through the product instead of a static scarcity card.
|
||||
// -------------------------------------------------------------------
|
||||
{
|
||||
name: 'ad-12-pricing-scarcity',
|
||||
city: 'london',
|
||||
promptText: 'Quieter London, good schools, lower crime',
|
||||
promptText: 'London under £700k, good schools, lower crime and lower noise',
|
||||
filters: {
|
||||
'Estimated current price': [0, 700000],
|
||||
'Outstanding primary schools within 2km': [1, 10],
|
||||
'Serious crime per 1k residents (avg/yr)': [0, 50],
|
||||
'Noise (dB)': [0, 58],
|
||||
},
|
||||
initialZoom: 10.4,
|
||||
posterTimeS: 3,
|
||||
cues: [
|
||||
{
|
||||
text: 'Seventeen spots left at nine ninety nine.',
|
||||
during: [showScene({
|
||||
mode: 'rank',
|
||||
accent: 'amber',
|
||||
kicker: 'Founder pricing',
|
||||
title: 'Cheap tier almost gone.',
|
||||
items: [
|
||||
{ label: '£0.99 / month', value: 'sold out', tone: 'bad' },
|
||||
{ label: '£9.99 / month', value: '17 left', tone: 'warn' },
|
||||
{ label: '£29.99 / month', value: 'next', tone: 'neutral' },
|
||||
],
|
||||
})],
|
||||
text: 'Nine ninety nine beats one wasted viewing.',
|
||||
during: [typeAct(
|
||||
'London under £700k, good schools, lower crime and lower noise',
|
||||
2700
|
||||
), submitAct(900)],
|
||||
tail: [wait(400)],
|
||||
},
|
||||
{
|
||||
text: 'Then the price triples.',
|
||||
text: 'Use the map before spending a Saturday in the wrong area.',
|
||||
during: [scrollFilters(300, 900)],
|
||||
tail: [wait(500)],
|
||||
},
|
||||
{
|
||||
text: 'Get in before the next jump.',
|
||||
during: [hideScene(360)],
|
||||
text: 'The cheapest mistake is the one you skip.',
|
||||
tail: [wait(600)],
|
||||
},
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue